This repository has been archived by the owner on Nov 3, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add to task_list * Add proper test set. * Lol * Lint.
- Loading branch information
1 parent
2426d74
commit a72ef5b
Showing
8 changed files
with
358 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from parlai.core.teachers import DialogTeacher | ||
from parlai.utils.io import PathManager | ||
from parlai.utils.data import DatatypeHelper | ||
from .build import build | ||
import os | ||
import pandas as pd | ||
import hashlib | ||
|
||
|
||
class MetalWozTeacher(DialogTeacher): | ||
def _path(self, opt): | ||
fold = DatatypeHelper.fold(opt['datatype']) | ||
if fold == 'train' or fold == 'valid': | ||
folder = os.path.join(opt['datapath'], 'metalwoz', 'train') | ||
else: | ||
folder = os.path.join(opt['datapath'], 'metalwoz', 'test') | ||
return folder, fold | ||
|
||
def __init__(self, opt, shared=None): | ||
if shared is None: | ||
build(opt) | ||
folder, fold = self._path(opt) | ||
self.fold = fold | ||
opt['datafile'] = os.path.join(folder, fold) | ||
super().__init__(opt, shared) | ||
|
||
def _hash(self, string): | ||
return int(hashlib.sha1(string.encode('utf-8')).hexdigest(), 16) % 10 | ||
|
||
def setup_data(self, datapath): | ||
folder, fold = os.path.split(datapath) | ||
with PathManager.open(os.path.join(folder, 'tasks.txt')) as taskf: | ||
tasks_table = pd.read_json(taskf, lines=True) | ||
|
||
dfolder = os.path.join(folder, 'dialogues') | ||
|
||
data = [] | ||
|
||
for filename in PathManager.ls(dfolder): | ||
fullfn = os.path.join(dfolder, filename) | ||
with PathManager.open(fullfn) as dataf: | ||
data.append(pd.read_json(dataf, lines=True)) | ||
|
||
data = pd.concat(data, axis=0) | ||
data = data.sample(frac=1.0, random_state=83741) # metal in l33t numbers, lol | ||
data = data.merge(tasks_table, on='task_id') | ||
data['fold'] = data['domain_x'].apply(self._hash) | ||
|
||
for _, row in data.iterrows(): | ||
if fold == 'valid' and row['fold'] != 9: | ||
continue | ||
if fold == 'train' and row['fold'] == 9: | ||
continue | ||
texts = [row['bot_role']] + list(row['turns']) | ||
prompts, labels = texts[::2], texts[1::2] | ||
for i, (prompt, label) in enumerate(zip(prompts, labels)): | ||
yield { | ||
'text': prompt, | ||
'label': label, | ||
'bot_role': row['bot_role'], | ||
'bot_prompt': row['bot_prompt'], | ||
'user_role': row['user_role'], | ||
'user_prompt': row['user_prompt'], | ||
'utterance_id': row['id'], | ||
'domain': row['domain_x'], | ||
'task_id': row['task_id'], | ||
}, i == 0 | ||
|
||
|
||
class DefaultTeacher(MetalWozTeacher): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import parlai.core.build_data as build_data | ||
import os | ||
from parlai.core.build_data import DownloadableFile | ||
|
||
RESOURCES = [ | ||
DownloadableFile( | ||
'https://download.microsoft.com/download/E/B/8/EB84CB1A-D57D-455F-B905-3ABDE80404E5/metalwoz-v1.zip', | ||
'metalwoz-v1.zip', | ||
'2a2ae3b25760aa2725e70bc6480562fa5d720c9689a508d28417631496d6764f', | ||
), | ||
DownloadableFile( | ||
'https://download.microsoft.com/download/0/c/4/0c4a8893-cbf9-4a43-a44a-09bab9539234/metalwoz-test-v1.zip', | ||
'metalwoz-test-v1.zip', | ||
'6722d1d9ec05334dd801972767ae3bdefcd15f71bf73fea1d098f214a96a7c6c', | ||
), | ||
] | ||
|
||
|
||
def build(opt): | ||
dpath = os.path.join(opt['datapath'], 'metalwoz') | ||
version = '1.0' | ||
|
||
if not build_data.built(dpath, version_string=version): | ||
if build_data.built(dpath): | ||
build_data.remove_dir(dpath) | ||
build_data.make_dir(dpath) | ||
build_data.make_dir(os.path.join(dpath, 'train', 'dialogues')) | ||
build_data.make_dir(os.path.join(dpath, 'test', 'dialogues')) | ||
|
||
# Download the data. | ||
RESOURCES[0].download_file(os.path.join(dpath, 'train')) | ||
RESOURCES[1].download_file(os.path.join(dpath, 'test')) | ||
|
||
build_data.untar(os.path.join(dpath, 'test'), 'dstc8_metalwoz_heldout.zip') | ||
build_data.mark_done(dpath, version_string=version) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from parlai.utils.testing import AutoTeacherTest | ||
|
||
|
||
class TestDefaultTeacher(AutoTeacherTest): | ||
task = "metalwoz" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
acts: | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- Hello how may I help you? | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: You are a bot that provides tourism related advice | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- I can assist you with your upcoming Montreal trip. What do you need? | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: I have some questions about my upcoming travel to Montreal. | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- 'Here are a few festivals: BarnFest Steakfest Musicfest' | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: I'm heading there shortly during what is there summer months and was wondering | ||
if there are good festivals going on then. | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- They are all occuring in July. | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: Those sound good. Do you have the dates they are happening? | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
- - bot_prompt: Fulfil the user's request | ||
bot_role: You are a bot that provides tourism related advice | ||
domain: TOURISM | ||
episode_done: false | ||
eval_labels: | ||
- Sounds fun! Can I help you with anything else today? | ||
id: metalwoz | ||
task_id: 290f924c | ||
text: That is when I plan on going. They all sound good to me. | ||
user_prompt: Tell the Bot that you are heading to Montreal in the summer, and | ||
ask if there are any good festivals around that time | ||
user_role: You are interacting with a bot that gives tourism related advice | ||
utterance_id: b942824c | ||
num_episodes: 2319 | ||
num_examples: 14067 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
acts: | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- Hello how may I help you? | ||
task_id: 2511cf64 | ||
text: You are a bot that helps people book skiing trips | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- There are no ski hills in your location | ||
task_id: 2511cf64 | ||
text: Are there any ski resorts me? | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- In the mount le | ||
task_id: 2511cf64 | ||
text: What's the nearest ski resort? | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- 4 hrs | ||
task_id: 2511cf64 | ||
text: How many hours way is that from me? | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
- - bot_prompt: Tell the user that there are no ski hills in their immediate location | ||
bot_role: You are a bot that helps people book skiing trips | ||
domain: SKI_BOT | ||
episode_done: false | ||
id: metalwoz | ||
labels: | ||
- is there anything else? | ||
task_id: 2511cf64 | ||
text: Okay thanks | ||
user_prompt: You want to know if there are good ski hills an hour's drive from | ||
your current location | ||
user_role: You are interacting with a bot designed to help you book a skiing trip | ||
utterance_id: c3ec2179 | ||
num_episodes: 31677 | ||
num_examples: 194324 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
acts: | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Hello how may I help you? hello. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: You are a bot that clarifies the rules for games | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Yes sure. What game ? | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: Hi, can you help me with a game? | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- No there isn't. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: Okay, I need to know if theres a rule for who goes first in checkers | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Yes as long as you take move. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: So it doesn't matter who goes first? | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
- - bot_prompt: Reply to the customer and try to fulfil their request. If you think | ||
the request they are making goes beyond your role, inform the user that you | ||
are not equipped to help them | ||
bot_role: You are a bot that clarifies the rules for games | ||
domain: GAME_RULES | ||
episode_done: false | ||
eval_labels: | ||
- Yes probably. | ||
id: metalwoz | ||
task_id: a5137c64 | ||
text: Okay, I guess we'll just take turns then | ||
user_prompt: Start a conversation based on you customerRole | ||
user_role: You are interacting with a bot that clarifies the rules of games | ||
utterance_id: 194e1958 | ||
num_episodes: 6207 | ||
num_examples: 37545 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters