Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Wizard of Wikipedia, knowledge source page title. #3845

Merged
merged 6 commits into from
Jul 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions parlai/tasks/wizard_of_wikipedia/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import copy
from parlai.core.teachers import FixedDialogTeacher, MultiTaskTeacher
from parlai.utils.io import PathManager
from parlai.utils import logging
from parlai.utils.misc import warn_once
from .build import build

Expand Down Expand Up @@ -757,6 +758,113 @@ def get(self, episode_idx, entry_idx=0):
return a


class WikiPageTitleTeacher(WizardDialogKnowledgeTeacher):
"""
Generates the title of Wikipedia page used as source of knowledge.

The context provided by this teacher (`text`) is the conversation history, with chosen topic removed.
The label is the title of the Wikipedia page of the passage that wizard selected for crafting
the next utterance; in other words, the source of knowledge for this utterance.
"""

def __init__(self, opt, shared=None):
self.opt = copy.deepcopy(opt)
self.opt['label_type'] = 'response'
super().__init__(self.opt, shared=shared)
self.id = 'WikiPageTitleTeacher'
self._conv_history_len = self.opt['conversation_history_length']
if not (self._conv_history_len > 0 or self._conv_history_len == -1):
logging.warning(
f'"{self._conv_history_len}" is an invalid value for --conversation-history-length flag.'
' Changing it to default of -1 (include the entire message history).'
)
self._conv_history_len = -1
self._skip_no_title = self.opt['skip_no_title']
if not shared:
self._preprocess_data()
else:
self.titles_data = shared['titles_data']

@classmethod
def add_cmdline_args(cls, parser, partial_opt=None):
super().add_cmdline_args(parser, partial_opt=partial_opt)
agent = parser.add_argument_group('Wikipedia Page Title Arguments')
agent.add_argument(
'--conversation-history-length',
type=int,
default=-1,
help='Number of previous utterances to keep in context, 0 (default) includes all',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we make the default -1? and have that map to including all? making 0 mean includes all doesn't seem right (as one could argue this should include... none)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed default to -1, and added something to change any non-positive value to that.

)
agent.add_argument(
'--skip-no-title',
type='bool',
default=True,
help=(
'Whether to skip the example if no passage was selected. If `false` '
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i wonder if this teacher would be more useful with this flag default to True, since we really want to train a model that can generate titles (not just generate TOKEN_NOCHOSEN). Anyway that's up to you

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, that might make more sense.

f'uses `{TOKEN_NOCHOSEN}` instead of title if no knowledge source was selected.'
),
)
return parser

def share(self):
shared = super().share()
shared['titles_data'] = self.titles_data
return shared

def _generate_messages(self, hist, action):
include_hist = (
hist[-self._conv_history_len :] if self._conv_history_len > 0 else hist
)
context = '\n'.join(include_hist)
return Message(
{
'id': "Wikipedia Title Teacher",
'text': context,
'labels': [action["title"]],
'episode_done': True,
}
)

def _should_include(self, act):
return not (self._skip_no_title and act['labels'][0] == TOKEN_NOCHOSEN)

def _preprocess_data(self):
data = []
for episode_idx in range(super().num_episodes()):
dialog_history = []
ex_idx = 0
while True:
a = super().get(episode_idx, ex_idx)
text_parts = a['text'].split('\n')
if ex_idx == 0:
# throwing away chosen_topic
text_parts = text_parts[1:]
if text_parts:
dialog_history.append(text_parts[0])
title_act = self._generate_messages(dialog_history, a)
if self._should_include(title_act):
data.append(title_act)
if a['episode_done']:
break
ex_idx += 1
dialog_history.append(a['labels'][0])

logging.info(
f'{len(data)} title generation examples generated '
f'from {super().num_examples()} original examples'
)
self.titles_data = data

def num_episodes(self):
return len(self.titles_data)

def num_examples(self):
return self.num_episodes()

def get(self, episode_idx, entry_idx=0):
return self.titles_data[episode_idx]


####################################################
# #
# Doc Reader Teachers #
Expand Down
4 changes: 4 additions & 0 deletions parlai/tasks/wizard_of_wikipedia/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,9 @@ class TestGeneratorTeacher(AutoTeacherTest):
task = "wizard_of_wikipedia:generator"


class TestWikiPageTitleTeacher(AutoTeacherTest):
task = "wizard_of_wikipedia:wiki_page_title"


class TestDocreaderTeacher(AutoTeacherTest):
task = "wizard_of_wikipedia:docreader"
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
acts:
- - episode_done: true
eval_labels:
- Royal Blue (train)
id: WikiPageTitleTeacher
text: 'Blue is my favorite primary color.

Blue is always nice. I like royal blue.'
- - episode_done: true
eval_labels:
- Blue Skies (1946 film)
id: WikiPageTitleTeacher
text: 'Blue is my favorite primary color.

Blue is always nice. I like royal blue.

I once road on The Royal Blue train from New York to D.C

Oh that sounds really nice. I bet there was a lot of scenery and blue skies.'
- - episode_done: true
eval_labels:
- Cinematography
id: WikiPageTitleTeacher
text: Hi buddy, What you think about cinematography
- - episode_done: true
eval_labels:
- Cinematography
id: WikiPageTitleTeacher
text: "Hi buddy, What you think about cinematography\nCinematography,is a type\
\ of motion picture , captured electronically by means of an image \nYes buddy,\
\ Images captured with an electronic image-sensor, produces an electrical charge.The\
\ word \"cinematography\" is based on the Greek words meaning movement, motion."
- - episode_done: true
eval_labels:
- Photography
id: WikiPageTitleTeacher
text: "Hi buddy, What you think about cinematography\nCinematography,is a type\
\ of motion picture , captured electronically by means of an image \nYes buddy,\
\ Images captured with an electronic image-sensor, produces an electrical charge.The\
\ word \"cinematography\" is based on the Greek words meaning movement, motion.\n\
It works by lens used to repeatedly focus the light reflected from objects into\
\ real images on the light-sensitive surface .\n Muybridge sequence of a horse\
\ galloping In the 1830s, moving images were produced on revolving drums and\
\ disks"
num_episodes: 3181
num_examples: 3181
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
acts:
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Science fiction film
text: 'I think science fiction is an amazing genre for anything. Future science,
technology, time travel, FTL travel, they''re all such interesting concepts.

I''m a huge fan of science fiction myself! '
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Time travel in fiction
text: "I think science fiction is an amazing genre for anything. Future science,\
\ technology, time travel, FTL travel, they're all such interesting concepts.\n\
I'm a huge fan of science fiction myself! \nAwesome! I really love how sci-fi\
\ storytellers focus on political/social/philosophical issues that would still\
\ be around even in the future. Makes them relatable.\nI agree. One of my favorite\
\ forms of science fiction is anything related to time travel! I find it fascinating."
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Science fiction
text: "I think science fiction is an amazing genre for anything. Future science,\
\ technology, time travel, FTL travel, they're all such interesting concepts.\n\
I'm a huge fan of science fiction myself! \nAwesome! I really love how sci-fi\
\ storytellers focus on political/social/philosophical issues that would still\
\ be around even in the future. Makes them relatable.\nI agree. One of my favorite\
\ forms of science fiction is anything related to time travel! I find it fascinating.\n\
It's not quite sci-fi, but my favorite version of time travel is in Harry Potter\
\ and the Prisoner of Azkaban. Breaks zero logical rules.\nAnd that's difficult\
\ to do when dealing with time travel. I actually haven't seen the latest Harry\
\ Potter movies. Guess it's time to check them out!"
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Internet access
text: 'Can you imagine the world without internet access? '
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Internet access
text: "Can you imagine the world without internet access? \nNo I could not! I\
\ couldn't imagine living when internet access was rare and very few people\
\ had it!\nOh me either! It seems like such a long time ago. I wonder when Internet\
\ was first created?"
num_episodes: 60797
num_examples: 60797
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
acts:
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: I like Gardening, even when I've only been doing it for a short time.
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: 'I like Gardening, even when I''ve only been doing it for a short time.

I live on a farm, we garden all year long, it is very relaxing.

That sounds great. I''ve always thought that I would love living in a farm,
but I;ve always lived in the city. What do you mostly plant?'
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: 'I like Gardening, even when I''ve only been doing it for a short time.

I live on a farm, we garden all year long, it is very relaxing.

That sounds great. I''ve always thought that I would love living in a farm,
but I;ve always lived in the city. What do you mostly plant?

I have planted several fruits trees, tomatoes, jalepenos, bell peppers, onions,
Garlic, and potatoes mostly.

Great, I love the idea of growing my own vegetables and fruits! Do you have
animals in the farm?'
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: 'I like Gardening, even when I''ve only been doing it for a short time.

I live on a farm, we garden all year long, it is very relaxing.

That sounds great. I''ve always thought that I would love living in a farm,
but I;ve always lived in the city. What do you mostly plant?

I have planted several fruits trees, tomatoes, jalepenos, bell peppers, onions,
Garlic, and potatoes mostly.

Great, I love the idea of growing my own vegetables and fruits! Do you have
animals in the farm?

yes i do. Cows, chickens, Micro pigs, Guinneas, We also do forest growing also.
we plants large pine trees.

Wow, it sounds amazing, the Micro-pigs are so cute! are they trainable to be
well behaved?'
- - episode_done: true
eval_labels:
- Bob Ross
id: WikiPageTitleTeacher
text: I would like to know more about bob ross
num_episodes: 3236
num_examples: 3236