This repository has been archived by the owner on Nov 3, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
[teacher] MetaLWoZ #3583
Merged
Merged
[teacher] MetaLWoZ #3583
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from parlai.core.teachers import DialogTeacher | ||
from parlai.utils.io import PathManager | ||
from parlai.utils.data import DatatypeHelper | ||
from .build import build | ||
import os | ||
import pandas as pd | ||
import hashlib | ||
|
||
|
||
class MetalWozTeacher(DialogTeacher): | ||
def _path(self, opt): | ||
fold = DatatypeHelper.fold(opt['datatype']) | ||
if fold == 'train' or fold == 'valid': | ||
folder = os.path.join(opt['datapath'], 'metalwoz', 'train') | ||
else: | ||
folder = os.path.join(opt['datapath'], 'metalwoz', 'test') | ||
return folder, fold | ||
|
||
def __init__(self, opt, shared=None): | ||
if shared is None: | ||
build(opt) | ||
folder, fold = self._path(opt) | ||
self.fold = fold | ||
opt['datafile'] = os.path.join(folder, fold) | ||
super().__init__(opt, shared) | ||
|
||
def _hash(self, string): | ||
return int(hashlib.sha1(string.encode('utf-8')).hexdigest(), 16) % 10 | ||
|
||
def setup_data(self, datapath): | ||
folder, fold = os.path.split(datapath) | ||
with PathManager.open(os.path.join(folder, 'tasks.txt')) as taskf: | ||
tasks_table = pd.read_json(taskf, lines=True) | ||
|
||
dfolder = os.path.join(folder, 'dialogues') | ||
|
||
data = [] | ||
|
||
for filename in PathManager.ls(dfolder): | ||
fullfn = os.path.join(dfolder, filename) | ||
with PathManager.open(fullfn) as dataf: | ||
data.append(pd.read_json(dataf, lines=True)) | ||
|
||
data = pd.concat(data, axis=0) | ||
data = data.sample(frac=1.0, random_state=83741) # metal in l33t numbers, lol | ||
data = data.merge(tasks_table, on='task_id') | ||
data['fold'] = data['domain_x'].apply(self._hash) | ||
|
||
for _, row in data.iterrows(): | ||
if fold == 'valid' and row['fold'] != 9: | ||
continue | ||
if fold == 'train' and row['fold'] == 9: | ||
continue | ||
texts = [row['bot_role']] + list(row['turns']) | ||
prompts, labels = texts[::2], texts[1::2] | ||
for i, (prompt, label) in enumerate(zip(prompts, labels)): | ||
yield { | ||
'text': prompt, | ||
'label': label, | ||
'bot_role': row['bot_role'], | ||
'bot_prompt': row['bot_prompt'], | ||
'user_role': row['user_role'], | ||
'user_prompt': row['user_prompt'], | ||
'utterance_id': row['id'], | ||
'domain': row['domain_x'], | ||
'task_id': row['task_id'], | ||
}, i == 0 | ||
|
||
|
||
class DefaultTeacher(MetalWozTeacher): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import parlai.core.build_data as build_data | ||
import os | ||
from parlai.core.build_data import DownloadableFile | ||
|
||
RESOURCES = [ | ||
DownloadableFile( | ||
'https://download.microsoft.com/download/E/B/8/EB84CB1A-D57D-455F-B905-3ABDE80404E5/metalwoz-v1.zip', | ||
'metalwoz-v1.zip', | ||
'2a2ae3b25760aa2725e70bc6480562fa5d720c9689a508d28417631496d6764f', | ||
), | ||
DownloadableFile( | ||
'https://download.microsoft.com/download/0/c/4/0c4a8893-cbf9-4a43-a44a-09bab9539234/metalwoz-test-v1.zip', | ||
'metalwoz-test-v1.zip', | ||
'6722d1d9ec05334dd801972767ae3bdefcd15f71bf73fea1d098f214a96a7c6c', | ||
), | ||
] | ||
|
||
|
||
def build(opt): | ||
dpath = os.path.join(opt['datapath'], 'metalwoz') | ||
version = '1.0' | ||
|
||
if not build_data.built(dpath, version_string=version): | ||
if build_data.built(dpath): | ||
build_data.remove_dir(dpath) | ||
build_data.make_dir(dpath) | ||
build_data.make_dir(os.path.join(dpath, 'train', 'dialogues')) | ||
build_data.make_dir(os.path.join(dpath, 'test', 'dialogues')) | ||
|
||
# Download the data. | ||
RESOURCES[0].download_file(os.path.join(dpath, 'train')) | ||
RESOURCES[1].download_file(os.path.join(dpath, 'test')) | ||
|
||
build_data.untar(os.path.join(dpath, 'test'), 'dstc8_metalwoz_heldout.zip') | ||
build_data.mark_done(dpath, version_string=version) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from parlai.utils.testing import AutoTeacherTest | ||
|
||
|
||
class TestDefaultTeacher(AutoTeacherTest): | ||
task = "metalwoz" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
acts: | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- Hello how may I help you? | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: You are a bot that provides tourism related advice | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- I can assist you with your upcoming Montreal trip. What do you need? | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: I have some questions about my upcoming travel to Montreal. | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- 'Here are a few festivals: BarnFest Steakfest Musicfest' | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: I'm heading there shortly during what is there summer months and was wondering | ||
if there are good festivals going on then. | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- They are all occuring in July. | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: Those sound good. Do you have the dates they are happening? | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- Sounds fun! Can I help you with anything else today? | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: That is when I plan on going. They all sound good to me. | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
num_episodes: 2319 | ||
num_examples: 14067 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
acts: | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- Hello how may I help you? | ||
task_id: 2511cf64 | ||
text: You are a bot that helps people book skiing trips | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- There are no ski hills in your location | ||
task_id: 2511cf64 | ||
text: Are there any ski resorts me? | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- In the mount le | ||
task_id: 2511cf64 | ||
text: What's the nearest ski resort? | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- 4 hrs | ||
task_id: 2511cf64 | ||
text: How many hours way is that from me? | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- is there anything else? | ||
task_id: 2511cf64 | ||
text: Okay thanks | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
num_episodes: 31677 | ||
num_examples: 194324 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
acts: | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Hello how may I help you? hello. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: You are a bot that clarifies the rules for games | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Yes sure. What game ? | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: Hi, can you help me with a game? | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- No there isn't. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: Okay, I need to know if theres a rule for who goes first in checkers | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Yes as long as you take move. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: So it doesn't matter who goes first? | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Yes probably. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: Okay, I guess we'll just take turns then | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
num_episodes: 6207 | ||
num_examples: 37545 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
...lol