diff --git a/parlai/tasks/metalwoz/__init__.py b/parlai/tasks/metalwoz/__init__.py new file mode 100644 index 00000000000..240697e3247 --- /dev/null +++ b/parlai/tasks/metalwoz/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/parlai/tasks/metalwoz/agents.py b/parlai/tasks/metalwoz/agents.py new file mode 100644 index 00000000000..a217c4e8a8f --- /dev/null +++ b/parlai/tasks/metalwoz/agents.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from parlai.core.teachers import DialogTeacher +from parlai.utils.io import PathManager +from parlai.utils.data import DatatypeHelper +from .build import build +import os +import pandas as pd +import hashlib + + +class MetalWozTeacher(DialogTeacher): + def _path(self, opt): + fold = DatatypeHelper.fold(opt['datatype']) + if fold == 'train' or fold == 'valid': + folder = os.path.join(opt['datapath'], 'metalwoz', 'train') + else: + folder = os.path.join(opt['datapath'], 'metalwoz', 'test') + return folder, fold + + def __init__(self, opt, shared=None): + if shared is None: + build(opt) + folder, fold = self._path(opt) + self.fold = fold + opt['datafile'] = os.path.join(folder, fold) + super().__init__(opt, shared) + + def _hash(self, string): + return int(hashlib.sha1(string.encode('utf-8')).hexdigest(), 16) % 10 + + def setup_data(self, datapath): + folder, fold = os.path.split(datapath) + with PathManager.open(os.path.join(folder, 'tasks.txt')) as taskf: + tasks_table = pd.read_json(taskf, lines=True) + + dfolder = os.path.join(folder, 'dialogues') + + data = [] + + for filename in PathManager.ls(dfolder): + fullfn = os.path.join(dfolder, filename) + with PathManager.open(fullfn) as dataf: + data.append(pd.read_json(dataf, lines=True)) + + data = pd.concat(data, axis=0) + data = data.sample(frac=1.0, random_state=83741) # metal in l33t numbers, lol + data = data.merge(tasks_table, on='task_id') + data['fold'] = data['domain_x'].apply(self._hash) + + for _, row in data.iterrows(): + if fold == 'valid' and row['fold'] != 9: + continue + if fold == 'train' and row['fold'] == 9: + continue + texts = [row['bot_role']] + list(row['turns']) + prompts, labels = texts[::2], texts[1::2] + for i, (prompt, label) in enumerate(zip(prompts, labels)): + yield { + 'text': prompt, + 'label': label, + 'bot_role': row['bot_role'], + 'bot_prompt': row['bot_prompt'], + 'user_role': row['user_role'], + 'user_prompt': row['user_prompt'], + 'utterance_id': row['id'], + 'domain': row['domain_x'], + 'task_id': row['task_id'], + }, i == 0 + + +class DefaultTeacher(MetalWozTeacher): + pass diff --git a/parlai/tasks/metalwoz/build.py b/parlai/tasks/metalwoz/build.py new file mode 100644 index 00000000000..aecfaaa6ee5 --- /dev/null +++ b/parlai/tasks/metalwoz/build.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import parlai.core.build_data as build_data +import os +from parlai.core.build_data import DownloadableFile + +RESOURCES = [ + DownloadableFile( + 'https://download.microsoft.com/download/E/B/8/EB84CB1A-D57D-455F-B905-3ABDE80404E5/metalwoz-v1.zip', + 'metalwoz-v1.zip', + '2a2ae3b25760aa2725e70bc6480562fa5d720c9689a508d28417631496d6764f', + ), + DownloadableFile( + 'https://download.microsoft.com/download/0/c/4/0c4a8893-cbf9-4a43-a44a-09bab9539234/metalwoz-test-v1.zip', + 'metalwoz-test-v1.zip', + '6722d1d9ec05334dd801972767ae3bdefcd15f71bf73fea1d098f214a96a7c6c', + ), +] + + +def build(opt): + dpath = os.path.join(opt['datapath'], 'metalwoz') + version = '1.0' + + if not build_data.built(dpath, version_string=version): + if build_data.built(dpath): + build_data.remove_dir(dpath) + build_data.make_dir(dpath) + build_data.make_dir(os.path.join(dpath, 'train', 'dialogues')) + build_data.make_dir(os.path.join(dpath, 'test', 'dialogues')) + + # Download the data. + RESOURCES[0].download_file(os.path.join(dpath, 'train')) + RESOURCES[1].download_file(os.path.join(dpath, 'test')) + + build_data.untar(os.path.join(dpath, 'test'), 'dstc8_metalwoz_heldout.zip') + build_data.mark_done(dpath, version_string=version) diff --git a/parlai/tasks/metalwoz/test.py b/parlai/tasks/metalwoz/test.py new file mode 100644 index 00000000000..ffbd99d2248 --- /dev/null +++ b/parlai/tasks/metalwoz/test.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from parlai.utils.testing import AutoTeacherTest + + +class TestDefaultTeacher(AutoTeacherTest): + task = "metalwoz" diff --git a/parlai/tasks/metalwoz/test/metalwoz_test.yml b/parlai/tasks/metalwoz/test/metalwoz_test.yml new file mode 100644 index 00000000000..abb9c9b56e9 --- /dev/null +++ b/parlai/tasks/metalwoz/test/metalwoz_test.yml @@ -0,0 +1,69 @@ +acts: +- - bot_prompt: Fulfil the user's request + bot_role: You are a bot that provides tourism related advice + domain: TOURISM + episode_done: false + eval_labels: + - Hello how may I help you? + id: metalwoz + task_id: 290f924c + text: You are a bot that provides tourism related advice + user_prompt: Tell the Bot that you are heading to Montreal in the summer, and + ask if there are any good festivals around that time + user_role: You are interacting with a bot that gives tourism related advice + utterance_id: b942824c +- - bot_prompt: Fulfil the user's request + bot_role: You are a bot that provides tourism related advice + domain: TOURISM + episode_done: false + eval_labels: + - I can assist you with your upcoming Montreal trip. What do you need? + id: metalwoz + task_id: 290f924c + text: I have some questions about my upcoming travel to Montreal. + user_prompt: Tell the Bot that you are heading to Montreal in the summer, and + ask if there are any good festivals around that time + user_role: You are interacting with a bot that gives tourism related advice + utterance_id: b942824c +- - bot_prompt: Fulfil the user's request + bot_role: You are a bot that provides tourism related advice + domain: TOURISM + episode_done: false + eval_labels: + - 'Here are a few festivals: BarnFest Steakfest Musicfest' + id: metalwoz + task_id: 290f924c + text: I'm heading there shortly during what is there summer months and was wondering + if there are good festivals going on then. + user_prompt: Tell the Bot that you are heading to Montreal in the summer, and + ask if there are any good festivals around that time + user_role: You are interacting with a bot that gives tourism related advice + utterance_id: b942824c +- - bot_prompt: Fulfil the user's request + bot_role: You are a bot that provides tourism related advice + domain: TOURISM + episode_done: false + eval_labels: + - They are all occuring in July. + id: metalwoz + task_id: 290f924c + text: Those sound good. Do you have the dates they are happening? + user_prompt: Tell the Bot that you are heading to Montreal in the summer, and + ask if there are any good festivals around that time + user_role: You are interacting with a bot that gives tourism related advice + utterance_id: b942824c +- - bot_prompt: Fulfil the user's request + bot_role: You are a bot that provides tourism related advice + domain: TOURISM + episode_done: false + eval_labels: + - Sounds fun! Can I help you with anything else today? + id: metalwoz + task_id: 290f924c + text: That is when I plan on going. They all sound good to me. + user_prompt: Tell the Bot that you are heading to Montreal in the summer, and + ask if there are any good festivals around that time + user_role: You are interacting with a bot that gives tourism related advice + utterance_id: b942824c +num_episodes: 2319 +num_examples: 14067 diff --git a/parlai/tasks/metalwoz/test/metalwoz_train.yml b/parlai/tasks/metalwoz/test/metalwoz_train.yml new file mode 100644 index 00000000000..2214acfb1e4 --- /dev/null +++ b/parlai/tasks/metalwoz/test/metalwoz_train.yml @@ -0,0 +1,68 @@ +acts: +- - bot_prompt: Tell the user that there are no ski hills in their immediate location + bot_role: You are a bot that helps people book skiing trips + domain: SKI_BOT + episode_done: false + id: metalwoz + labels: + - Hello how may I help you? + task_id: 2511cf64 + text: You are a bot that helps people book skiing trips + user_prompt: You want to know if there are good ski hills an hour's drive from + your current location + user_role: You are interacting with a bot designed to help you book a skiing trip + utterance_id: c3ec2179 +- - bot_prompt: Tell the user that there are no ski hills in their immediate location + bot_role: You are a bot that helps people book skiing trips + domain: SKI_BOT + episode_done: false + id: metalwoz + labels: + - There are no ski hills in your location + task_id: 2511cf64 + text: Are there any ski resorts me? + user_prompt: You want to know if there are good ski hills an hour's drive from + your current location + user_role: You are interacting with a bot designed to help you book a skiing trip + utterance_id: c3ec2179 +- - bot_prompt: Tell the user that there are no ski hills in their immediate location + bot_role: You are a bot that helps people book skiing trips + domain: SKI_BOT + episode_done: false + id: metalwoz + labels: + - In the mount le + task_id: 2511cf64 + text: What's the nearest ski resort? + user_prompt: You want to know if there are good ski hills an hour's drive from + your current location + user_role: You are interacting with a bot designed to help you book a skiing trip + utterance_id: c3ec2179 +- - bot_prompt: Tell the user that there are no ski hills in their immediate location + bot_role: You are a bot that helps people book skiing trips + domain: SKI_BOT + episode_done: false + id: metalwoz + labels: + - 4 hrs + task_id: 2511cf64 + text: How many hours way is that from me? + user_prompt: You want to know if there are good ski hills an hour's drive from + your current location + user_role: You are interacting with a bot designed to help you book a skiing trip + utterance_id: c3ec2179 +- - bot_prompt: Tell the user that there are no ski hills in their immediate location + bot_role: You are a bot that helps people book skiing trips + domain: SKI_BOT + episode_done: false + id: metalwoz + labels: + - is there anything else? + task_id: 2511cf64 + text: Okay thanks + user_prompt: You want to know if there are good ski hills an hour's drive from + your current location + user_role: You are interacting with a bot designed to help you book a skiing trip + utterance_id: c3ec2179 +num_episodes: 31677 +num_examples: 194324 diff --git a/parlai/tasks/metalwoz/test/metalwoz_valid.yml b/parlai/tasks/metalwoz/test/metalwoz_valid.yml new file mode 100644 index 00000000000..a29b7debbcb --- /dev/null +++ b/parlai/tasks/metalwoz/test/metalwoz_valid.yml @@ -0,0 +1,73 @@ +acts: +- - bot_prompt: Reply to the customer and try to fulfil their request. If you think + the request they are making goes beyond your role, inform the user that you + are not equipped to help them + bot_role: You are a bot that clarifies the rules for games + domain: GAME_RULES + episode_done: false + eval_labels: + - Hello how may I help you? hello. + id: metalwoz + task_id: a5137c64 + text: You are a bot that clarifies the rules for games + user_prompt: Start a conversation based on you customerRole + user_role: You are interacting with a bot that clarifies the rules of games + utterance_id: 194e1958 +- - bot_prompt: Reply to the customer and try to fulfil their request. If you think + the request they are making goes beyond your role, inform the user that you + are not equipped to help them + bot_role: You are a bot that clarifies the rules for games + domain: GAME_RULES + episode_done: false + eval_labels: + - Yes sure. What game ? + id: metalwoz + task_id: a5137c64 + text: Hi, can you help me with a game? + user_prompt: Start a conversation based on you customerRole + user_role: You are interacting with a bot that clarifies the rules of games + utterance_id: 194e1958 +- - bot_prompt: Reply to the customer and try to fulfil their request. If you think + the request they are making goes beyond your role, inform the user that you + are not equipped to help them + bot_role: You are a bot that clarifies the rules for games + domain: GAME_RULES + episode_done: false + eval_labels: + - No there isn't. + id: metalwoz + task_id: a5137c64 + text: Okay, I need to know if theres a rule for who goes first in checkers + user_prompt: Start a conversation based on you customerRole + user_role: You are interacting with a bot that clarifies the rules of games + utterance_id: 194e1958 +- - bot_prompt: Reply to the customer and try to fulfil their request. If you think + the request they are making goes beyond your role, inform the user that you + are not equipped to help them + bot_role: You are a bot that clarifies the rules for games + domain: GAME_RULES + episode_done: false + eval_labels: + - Yes as long as you take move. + id: metalwoz + task_id: a5137c64 + text: So it doesn't matter who goes first? + user_prompt: Start a conversation based on you customerRole + user_role: You are interacting with a bot that clarifies the rules of games + utterance_id: 194e1958 +- - bot_prompt: Reply to the customer and try to fulfil their request. If you think + the request they are making goes beyond your role, inform the user that you + are not equipped to help them + bot_role: You are a bot that clarifies the rules for games + domain: GAME_RULES + episode_done: false + eval_labels: + - Yes probably. + id: metalwoz + task_id: a5137c64 + text: Okay, I guess we'll just take turns then + user_prompt: Start a conversation based on you customerRole + user_role: You are interacting with a bot that clarifies the rules of games + utterance_id: 194e1958 +num_episodes: 6207 +num_examples: 37545 diff --git a/parlai/tasks/task_list.py b/parlai/tasks/task_list.py index 9877ca53270..82b55536c02 100644 --- a/parlai/tasks/task_list.py +++ b/parlai/tasks/task_list.py @@ -1350,4 +1350,18 @@ "description": "Task for detect whether the last utterance contradicts previous dialogue history.", "links": {"arXiv": "https://arxiv.org/abs/2012.13391"}, }, + { + "id": "metalwoz", + "display_name": "MetaLWOz", + "task": "metalwoz", + "tags": ["Goal"], + "description": ( + "Meta-Learning Wizard-of-Oz (MetaLWOz) is a dataset designed to help " + "develop models capable of predicting user responses in unseen domains." + ), + "links": { + "paper": "http://workshop.colips.org/dstc7/dstc8/DTSC8_multidomain_task_proposal.pdf", + "website": "https://www.microsoft.com/en-us/research/project/metalwoz/", + }, + }, ]