diff --git a/parlai/tasks/taskmaster2/README.md b/parlai/tasks/taskmaster2/README.md index ac138930708..a64ce6fd321 100644 --- a/parlai/tasks/taskmaster2/README.md +++ b/parlai/tasks/taskmaster2/README.md @@ -1,5 +1,5 @@ # Taskmaster 2 Originally from the -[Google Research Datasets](https://github.com/google-research-datasets/Taskmaster/blob/main/TM-2-2020/README.md). +[Google Research Datasets](https://github.com/google-research-datasets/Taskmaster/blob/master/TM-2-2020/README.md). See that page for details. diff --git a/parlai/tasks/taskmaster2/agents.py b/parlai/tasks/taskmaster2/agents.py index ce761afc47c..f5cb8d81fa0 100644 --- a/parlai/tasks/taskmaster2/agents.py +++ b/parlai/tasks/taskmaster2/agents.py @@ -14,36 +14,31 @@ from parlai.core.params import ParlaiParser import os import pandas as pd -import hashlib from collections import Counter from parlai.core.opt import Opt -from parlai.core.teachers import DialogTeacher -from parlai.core.metrics import AverageMetric, F1Metric, BleuMetric +import parlai.core.tod.tod_core as tod from parlai.utils.misc import warn_once import json -import parlai.utils.logging as logging -from typing import Optional, Tuple -from parlai.core.message import Message +from typing import Optional +from parlai.utils.data import DatatypeHelper from parlai.utils.io import PathManager import parlai.tasks.taskmaster2.build as build_ +import parlai.core.tod.tod_agents as tod_agents + DOMAINS = [ - 'flights', - 'food-ordering', - 'hotels', - 'movies', - 'restaurant-search', - 'sports', - 'music', + "flights", + "food-ordering", + "hotels", + "movies", + "restaurant-search", + "sports", + "music", ] -ONTO_TOKEN = "Onto:" -CALL_TOKEN = "Call:" -RESP_TOKEN = "Result:" - -class _Abstract(DialogTeacher): +class Taskmaster2Parser(tod_agents.TodStructuredDataParser): """ Abstract data loader. """ @@ -52,21 +47,26 @@ class _Abstract(DialogTeacher): def add_cmdline_args( cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None ) -> ParlaiParser: - super().add_cmdline_args(parser, partial_opt) - parser.add_argument('--include-ontology', type=bool, default=False) parser.add_argument( - '--domains', - nargs='+', + "--taskmaster2-domains", + nargs="+", default=DOMAINS, choices=DOMAINS, - help='Uses last passed in configuration.', + help="Uses last passed in configuration.", + ) + parser.add_argument( + "--use-cumulative-api-calls", + type=bool, + default=True, + help="Have API Call/API response turns only when an API response" + "slot exist. Accumulate all API call slots with same API call name", ) - return parser + return super().add_cmdline_args(parser, partial_opt) def __init__(self, opt: Opt, shared=None): - self.fold = opt['datatype'].split(':')[0] - opt['datafile'] = self.fold - self.dpath = os.path.join(opt['datapath'], 'taskmaster-2') + self.fold = DatatypeHelper.fold(opt["datatype"]) + opt["datafile"] = self.fold + self.dpath = os.path.join(opt["datapath"], "taskmaster-2") if shared is None: warn_once( "Taskmaster2 is a beta dataset, and format may significantly change." @@ -74,298 +74,157 @@ def __init__(self, opt: Opt, shared=None): build_.build(opt) super().__init__(opt, shared) - def _h(self, x): - """ - Hash function. - """ - h = int(hashlib.sha1(x.encode('utf-8')).hexdigest(), 16) % 10 - if h == 0: - return 'valid' - elif h == 1: - return 'test' - else: - return 'train' - - def _normalize_annotation(self, anno): - return anno - def _load_data(self, fold, domains): # load up the ontology - ontology = {} + ontologies = {} for section in domains: - parts = [] - fn = os.path.join(self.dpath, section + '.onto.json') - with PathManager.open(fn, 'r') as f: - o = json.load(f) - assert len(o) == 1 - o = list(o.values())[0] - for sub in o: - prefix = sub['prefix'] - parts += [ - self._normalize_annotation(f'{prefix}.{a}') - for a in sub['annotations'] - ] - ontology[section] = ' ; '.join(parts) + fn = os.path.join(self.dpath, section + ".onto.json") + with PathManager.open(fn, "r") as f: + ontologies.update(json.load(f)) chunks = [] for section in domains: - with PathManager.open(os.path.join(self.dpath, section + '.json')) as f: + with PathManager.open(os.path.join(self.dpath, section + ".json")) as f: subset = pd.read_json(f) - subset['domain'] = section + subset["domain"] = section chunks.append(subset) chunks = pd.concat(chunks, axis=0) - # shuffle deterministically for randomness in few-shot training + # deterministic shuffle data for splits chunks = chunks.sample(frac=1.0, random_state=42) - chunks['fold'] = self._label_fold(chunks) - # only the fold we need here - chunks = chunks[chunks.fold == fold].reset_index() - chunks['ontology'] = chunks['domain'].apply(ontology.get) - return chunks - - def _segments2text(self, segments): - output = [] + split_size = len(chunks) // 10 + if fold == "train": + chunks = chunks[: split_size * 8] + elif fold == "valid": + chunks = chunks[split_size * 8 : split_size * 9] + elif fold == "test": + chunks = chunks[split_size * 9 :] + return chunks, ontologies + + def _parse_segment_to_slots(self, segment_list): + result = {} + for segment in segment_list: + slot_name = segment["annotations"][0]["name"] + slot_value = segment["text"] + prefix_split_idx = slot_name.find(".") + api_name = slot_name[:prefix_split_idx] + slot_name = slot_name[prefix_split_idx + 1 :] + result[slot_name] = slot_value + result[tod.STANDARD_API_NAME_SLOT] = api_name + return result + + def _get_utterance_and_api_call_for_speaker(self, speaker, utterances, idx): + utts = [] slots = {} - for segment in segments: - val = segment['text'] - for anno_ in segment['annotations']: - anno = anno_['name'] - anno = self._normalize_annotation(anno) - output.append(f'{anno} = {val}') - slots[anno] = val - return " ; ".join(output), slots - - def custom_evaluation( - self, - teacher_action: Message, - labels: Optional[Tuple[str]], - model_response: Message, - ): - if 'metrics' in model_response and 'type' in teacher_action: - # keep copies of metrics across both api calls/responses - prefix = teacher_action['type'] - keys = list(model_response['metrics'].keys()) - for k in keys: - self.metrics.add(f'{prefix}_{k}', model_response['metrics'][k]) - - if 'text' not in model_response or not labels or 'type' not in teacher_action: - return - - domain = teacher_action['domain'] - - if teacher_action['type'] == 'apicall': - # also count slot accuracy - text = model_response['text'] - slot_guesses = set( - text.replace(CALL_TOKEN + " ", "").split(' ; ') - ) # prevent cheating via repeated guesses - correct = 0 - for slot_guess in slot_guesses: - if ' = ' not in slot_guess: - continue - try: - slot, guess = slot_guess.split(' = ') - except ValueError: - continue - if teacher_action['slots'].get(slot) == guess: - self.metrics.add('slot_p', AverageMetric(1)) - self.metrics.add(f'{domain}_slot_p', AverageMetric(1)) - correct += 1 - else: - self.metrics.add('slot_p', AverageMetric(0)) - self.metrics.add(f'{domain}_slot_p', AverageMetric(0)) - logging.debug( - f"Bad slot guess '{slot_guess}' != {teacher_action['slots']}" - ) - if teacher_action['slots']: - self.metrics.add( - 'slot_r', AverageMetric(correct, len(teacher_action['slots'])) - ) - self.metrics.add( - f'{domain}_slot_r', - AverageMetric(correct, len(teacher_action['slots'])), - ) - self.metrics.add( - 'jga', AverageMetric(correct == len(teacher_action['slots'])) - ) - - elif teacher_action['type'] == 'apiresp': - # keep track of statistics by domain - f1_metric = F1Metric.compute(model_response['text'], labels) - bleu_metric = BleuMetric.compute(model_response['text'], labels) - self.metrics.add(f'{domain}_lex_f1', f1_metric) - self.metrics.add(f'{domain}_lex_bleu', bleu_metric) - - delex_text = model_response['text'] - delex_label = labels[0] - # compute delexicalized string metrics - for slot, value in teacher_action['slots'].items(): - delex_text = delex_text.replace(value, slot) - delex_label = delex_label.replace(value, slot) - f1_metric = F1Metric.compute(delex_text, (delex_label,)) - self.metrics.add('delex_f1', f1_metric) - self.metrics.add(f'{domain}_delex_f1', f1_metric) - bleu_metric = BleuMetric.compute(delex_text, [delex_label]) - self.metrics.add('delex_bleu', bleu_metric) - self.metrics.add(f'{domain}_delex_bleu', bleu_metric) - - def setup_data(self, fold): - domains = self.opt.get('domains', DOMAINS) - chunks = self._load_data(fold, domains) - domains_cnt = Counter() - for _, row in chunks.iterrows(): - domains_cnt[row['domain']] += 1 - first = True - utterances = row['utterances'][:] - if ( - len(utterances) >= 3 - and utterances[0]['speaker'] == 'USER' - and utterances[1]['speaker'] == 'ASSISTANT' - and utterances[2]['speaker'] == 'ASSISTANT' - and "help you?" in utterances[1]['text'] - ): - # skip this one - utterances.pop(1) - if self.opt['include_ontology']: - yield {'text': f"{ONTO_TOKEN} {row['ontology']}", 'label': ''}, True - first = False - while utterances: - utt = utterances.pop(0) - segtxt, slots = self._segments2text(utt.get('segments', [])) - if utt['speaker'] == 'USER': - yield { - 'text': utt['text'], - 'label': f'{CALL_TOKEN} {segtxt}', - 'domain': row['domain'], - 'slots': slots, - 'type': 'apicall', - }, first - first = False - elif utt['speaker'] == 'ASSISTANT': - yield { - 'text': f'{RESP_TOKEN} {segtxt}', - 'label': utt['text'], - 'domain': row['domain'], - 'slots': slots, - 'type': 'apiresp', - }, first - first = False - logging.debug(f"Fold {fold} domains: {domains_cnt}") - - -class DelexTeacher(_Abstract): - def _label_fold(self, chunks): - return chunks.conversation_id.apply(self._h) - - def _delexicalize(self, text, slots): - for key, value in slots.items(): - text = text.replace(value, key) - return text - - def setup_data(self, fold): + while idx < len(utterances): + here = utterances[idx] + if here["speaker"] != speaker: + break + utts.append(here["text"]) + slots.update(self._parse_segment_to_slots(here.get("segments", []))) + idx += 1 + return idx, "\n".join(utts), slots + + def _get_onto_list(self, onto_map, domain): + results = [] + domain = domain.replace( + "-", "_" + ) # cause they changed it for restaurant-search >.> + for data in onto_map[domain]: + call = {} + call[tod.STANDARD_API_NAME_SLOT] = data["prefix"] + call[tod.STANDARD_OPTIONAL_KEY] = data[ + "annotations" + ] # make all args optional since not specified + results.append(call) + return results + + def setup_episodes(self, fold): + """ + Parses into TodStructuredEpisode. + """ + domains = self.opt.get("taskmaster2_domains", DOMAINS) + chunks, ontologies = self._load_data(fold, domains) domains_cnt = Counter() - chunks = self._load_data(fold) + episodes = [] for _, row in chunks.iterrows(): - domains_cnt[row['domain']] += 1 - first = True - utterances = row['utterances'][:] - if ( - len(utterances) >= 3 - and utterances[0]['speaker'] == 'USER' - and utterances[1]['speaker'] == 'ASSISTANT' - and utterances[2]['speaker'] == 'ASSISTANT' - and "help you?" in utterances[1]['text'] - ): - # skip this one - utterances.pop(1) - - user_utterances = [] - asst_utterances = [] - while utterances: - utt = utterances.pop(0) - _, slots = self._segments2text(utt.get('segments', [])) - if utt['speaker'] == 'USER': - if asst_utterances: - yield { - 'text': ' __BREAK__ '.join(user_utterances), - 'label': ' __BREAK__ '.join(asst_utterances), - 'domain': row['domain'], - }, first - first = False - user_utterances = [] - asst_utterances = [] - user_utterances.append(self._delexicalize(utt['text'], slots)) - elif utt['speaker'] == 'ASSISTANT': - asst_utterances.append(self._delexicalize(utt['text'], slots)) - if not user_utterances: - user_utterances.append('__SILENCE__') - if asst_utterances: - yield { - 'text': ' __BREAK__ '.join(user_utterances), - 'label': ' __BREAK__ '.join(asst_utterances), - 'domain': row['domain'], - }, first + domains_cnt[row["domain"]] += 1 + utterances = row["utterances"][:] + + idx = 0 + rounds = [] + goal_calls = [] + if len(utterances) > 0 and utterances[0]["speaker"] == "ASSISTANT": + idx, sys_utt, api_resp = self._get_utterance_and_api_call_for_speaker( + "ASSISTANT", utterances, idx + ) + r = tod.TodStructuredRound(api_resp_machine=api_resp, sys_utt=sys_utt) + rounds.append(r) + cum_api_call = {} + while idx < len(utterances): + idx, user_utt, api_call = self._get_utterance_and_api_call_for_speaker( + "USER", utterances, idx + ) + idx, sys_utt, api_resp = self._get_utterance_and_api_call_for_speaker( + "ASSISTANT", utterances, idx + ) + if not self.opt["use_cumulative_api_calls"]: + r = tod.TodStructuredRound( + user_utt=user_utt, + api_call_machine=api_call, + api_resp_machine=api_resp, + sys_utt=sys_utt, + ) + else: + cum_api_call = self.process_call_for_cumlative_standalone_api( + api_call, cum_api_call + ) + r = tod.TodStructuredRound( + user_utt=user_utt, + api_call_machine=cum_api_call if len(api_resp) > 0 else {}, + api_resp_machine=api_resp if len(api_resp) > 0 else {}, + sys_utt=sys_utt, + ) -class TextOnlyTeacher(DelexTeacher): - def _delexicalize(self, text, slots): - return text + rounds.append(r) + if len(api_call) > 0: + goal_calls.append(api_call) + episode = tod.TodStructuredEpisode( + domain=tod.SerializationHelpers.inner_list_join(row["domain"]), + api_schemas_machine=self._get_onto_list(ontologies, row["domain"]), + goal_calls_machine=goal_calls, + rounds=rounds, + delex=self.opt.get("delex", False), + ) + episodes.append(episode) + return episodes -class FullShotTeacher(_Abstract): - """ - The full shot teacher uses a standard 80-10-10 split, without regarding domain. - """ + def get_id_task_prefix(self): + return "Taskmaster2" def _label_fold(self, chunks): return chunks.conversation_id.apply(self._h) + def process_call_for_cumlative_standalone_api(self, new_call, cum_calls): + if ( + len(new_call) > 0 + and len(cum_calls) > 0 + and new_call[tod.STANDARD_API_NAME_SLOT] + != cum_calls[tod.STANDARD_API_NAME_SLOT] + ): + cum_calls = {} + cum_calls.update(new_call) + return cum_calls -class FewShotTeacher(_Abstract): - """ - Few shot teacher tests for generalization to new domains. - """ - @classmethod - def add_cmdline_args( - cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None - ) -> ParlaiParser: - super().add_cmdline_args(parser, partial_opt) - parser.add_argument( - '--holdout', - default=DOMAINS[0], - choices=DOMAINS, - help='Domain which is held out from test', - ) - parser.add_argument( - '--n-shot', - default=100, - type=int, - help='Number of few shot examples to provide in training fold.', - ) - return super().add_cmdline_args(parser, partial_opt=partial_opt) +class UserSimulatorTeacher(Taskmaster2Parser, tod_agents.TodUserSimulatorTeacher): + pass - def _label_fold(self, chunks): - folds = [] - num_shots = 0 - for _, row in chunks.iterrows(): - if row['domain'] != self.opt['holdout']: - # if it's not in the holdout, always mark it train - folds.append('train') - else: - # keep the same valid/test sets as in fullshot, and only leak - # a small number of the training examples (i.e. throw away the - # vast majority of our data but keep test sets the same) - f = self._h(row['conversation_id']) - if f != 'train': - folds.append(f) - elif num_shots < self.opt['n_shot']: - folds.append('train') - num_shots += 1 - else: - folds.append('throwaway') - return folds +class SystemTeacher(Taskmaster2Parser, tod_agents.TodSystemTeacher): + pass -class DefaultTeacher(FullShotTeacher): +class DefaultTeacher(SystemTeacher): pass diff --git a/parlai/tasks/taskmaster2/build.py b/parlai/tasks/taskmaster2/build.py index 23b7a4845e8..1f71a2ae8ed 100644 --- a/parlai/tasks/taskmaster2/build.py +++ b/parlai/tasks/taskmaster2/build.py @@ -9,93 +9,93 @@ import os from parlai.core.build_data import DownloadableFile -ROOT_URL = 'https://github.com/google-research-datasets/Taskmaster/raw/master/TM-2-2020' +ROOT_URL = "https://github.com/google-research-datasets/Taskmaster/raw/master/TM-2-2020" RESOURCES = [ # raw data files DownloadableFile( - f'{ROOT_URL}/data/flights.json', - 'flights.json', - '86b37b5ae25f530fd18ced78800d30c3b54f7b34bb208ecb51842718f04e760b', + f"{ROOT_URL}/data/flights.json", + "flights.json", + "86b37b5ae25f530fd18ced78800d30c3b54f7b34bb208ecb51842718f04e760b", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/data/food-ordering.json', - 'food-ordering.json', - '0a042e566a816a5d0abebe6f7e8cfd6abaa89729ffc42f433d327df7342b12f8', + f"{ROOT_URL}/data/food-ordering.json", + "food-ordering.json", + "0a042e566a816a5d0abebe6f7e8cfd6abaa89729ffc42f433d327df7342b12f8", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/data/hotels.json', - 'hotels.json', - '975b0242f1e37ea1ab94ccedd7e0d6ee5831599d5df1f16143e71110d6c6006a', + f"{ROOT_URL}/data/hotels.json", + "hotels.json", + "975b0242f1e37ea1ab94ccedd7e0d6ee5831599d5df1f16143e71110d6c6006a", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/data/movies.json', - 'movies.json', - '6f67c9a1f04abc111186e5bcfbe3050be01d0737fd6422901402715bc1f3dd0d', + f"{ROOT_URL}/data/movies.json", + "movies.json", + "6f67c9a1f04abc111186e5bcfbe3050be01d0737fd6422901402715bc1f3dd0d", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/data/music.json', - 'music.json', - 'e5db60d6576fa010bef87a70a8b371d293d48cde8524c1d3ed7c3022f079d95d', + f"{ROOT_URL}/data/music.json", + "music.json", + "e5db60d6576fa010bef87a70a8b371d293d48cde8524c1d3ed7c3022f079d95d", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/data/restaurant-search.json', - 'restaurant-search.json', - 'fb9735f89e7ebc7c877f976da4c30391af6a6277991b597c0755564657ff8f47', + f"{ROOT_URL}/data/restaurant-search.json", + "restaurant-search.json", + "fb9735f89e7ebc7c877f976da4c30391af6a6277991b597c0755564657ff8f47", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/data/sports.json', - 'sports.json', - '8191531bfa5a8426b1508c396ab9886a19c7c620b443c436ec10d8d4708d0eac', + f"{ROOT_URL}/data/sports.json", + "sports.json", + "8191531bfa5a8426b1508c396ab9886a19c7c620b443c436ec10d8d4708d0eac", zipped=False, ), # ontology data files DownloadableFile( - f'{ROOT_URL}/ontology/flights.json', - 'flights.onto.json', - '1ebc5c982339d24b2dcf50677883fed65b7fcb95f01edbbd3be6357090893c33', + f"{ROOT_URL}/ontology/flights.json", + "flights.onto.json", + "1ebc5c982339d24b2dcf50677883fed65b7fcb95f01edbbd3be6357090893c33", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/ontology/food-ordering.json', - 'food-ordering.onto.json', - '79c1189c16f0ab937bad558c70a0b9b99358f9ed91ea65ce4af37c4b7d999063', + f"{ROOT_URL}/ontology/food-ordering.json", + "food-ordering.onto.json", + "79c1189c16f0ab937bad558c70a0b9b99358f9ed91ea65ce4af37c4b7d999063", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/ontology/hotels.json', - 'hotels.onto.json', - '22ae51ba546ee7ca03143097782817c4cdd0de74ac84893eaf40b8254aa866d3', + f"{ROOT_URL}/ontology/hotels.json", + "hotels.onto.json", + "22ae51ba546ee7ca03143097782817c4cdd0de74ac84893eaf40b8254aa866d3", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/ontology/movies.json', - 'movies.onto.json', - '8403283526bb314e871850b98bb86a7987ef0af6fbbe4fb5a089ee9498584476', + f"{ROOT_URL}/ontology/movies.json", + "movies.onto.json", + "8403283526bb314e871850b98bb86a7987ef0af6fbbe4fb5a089ee9498584476", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/ontology/music.json', - 'music.onto.json', - '4bcd6dcf1cdc6bdb717e5fdc08b3472dc3d1f4da8a0f8aee917494d79a7fe338', + f"{ROOT_URL}/ontology/music.json", + "music.onto.json", + "4bcd6dcf1cdc6bdb717e5fdc08b3472dc3d1f4da8a0f8aee917494d79a7fe338", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/ontology/restaurant-search.json', - 'restaurant-search.onto.json', - 'c9ead7985695b3feba1fb955e8407d806e4095f5459485adc5448ae89989e609', + f"{ROOT_URL}/ontology/restaurant-search.json", + "restaurant-search.onto.json", + "c9ead7985695b3feba1fb955e8407d806e4095f5459485adc5448ae89989e609", zipped=False, ), DownloadableFile( - f'{ROOT_URL}/ontology/sports.json', - 'sports.onto.json', - '52f9bbb86ebd9e2b3916185ad4e3e9b8b77d2164d96bd3b98ad67cbaa653757d', + f"{ROOT_URL}/ontology/sports.json", + "sports.onto.json", + "52f9bbb86ebd9e2b3916185ad4e3e9b8b77d2164d96bd3b98ad67cbaa653757d", zipped=False, ), ] @@ -103,13 +103,13 @@ def build(opt): # get path to data directory - dpath = os.path.join(opt['datapath'], 'taskmaster-2') + dpath = os.path.join(opt["datapath"], "taskmaster-2") # define version if any version = "1.1" # check if data had been previously built if not build_data.built(dpath, version_string=version): - print('[building data: ' + dpath + ']') + print("[building data: " + dpath + "]") # make a clean directory if needed if build_data.built(dpath): diff --git a/parlai/tasks/taskmaster2/test.py b/parlai/tasks/taskmaster2/test.py new file mode 100644 index 00000000000..e0d2e79a87c --- /dev/null +++ b/parlai/tasks/taskmaster2/test.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from parlai.utils.testing import AutoTeacherTest + + +class TestDefaultTeacher(AutoTeacherTest): + task = "taskmaster2" + + +class TestUserSimulatorTeacher(AutoTeacherTest): + task = "taskmaster2:UserSimulatorTeacher" diff --git a/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_test.yml b/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_test.yml new file mode 100644 index 00000000000..cabdc76d438 --- /dev/null +++ b/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_test.yml @@ -0,0 +1,46 @@ +acts: +- - domain: sports + episode_done: false + eval_labels: + - 'USER: Hey, what''s the Denver Broncos record?' + id: Taskmaster2_UserSimulatorTeacher + text: 'GOAL: api_name = nfl ; name.team = Denver Broncos' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: What Conference are they in?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: The Denver Broncos are in currently fourth in the AFC West with + a record of four wins and nine losses.' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: Who do they play against next week?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: The Denver Broncos played in the American Football Conference in + the west division.' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: When did they play last?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: In next week the Denver Broncos will be playing against the Indianapolis + Colts.' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: How many games back from first place are they?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Their last game was yesterday, they beat the New York Jets by 23 + to 0.' + type: 'USER: ' +num_episodes: 1734 +num_examples: 17425 diff --git a/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_train.yml b/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_train.yml new file mode 100644 index 00000000000..d4ad22a81c3 --- /dev/null +++ b/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_train.yml @@ -0,0 +1,43 @@ +acts: +- - domain: sports + episode_done: false + id: Taskmaster2_UserSimulatorTeacher + labels: + - 'USER: Hey. How are the Denver Nuggets doing this year?' + text: 'GOAL: api_name = nba ; name.team = Denver Nuggets | api_name = nba ; name.player + = Nikola Jokic' + type: 'USER: ' +- - domain: sports + episode_done: false + id: Taskmaster2_UserSimulatorTeacher + labels: + - 'USER: Okay. And what division are they in?' + slots: {} + text: 'SYSTEM: Hello, They''re currently six place in the Western Conference.' + type: 'USER: ' +- - domain: sports + episode_done: false + id: Taskmaster2_UserSimulatorTeacher + labels: + - 'USER: Okay. And how they did last game?' + slots: {} + text: 'SYSTEM: There in the Northwest division.' + type: 'USER: ' +- - domain: sports + episode_done: false + id: Taskmaster2_UserSimulatorTeacher + labels: + - 'USER: Okay. And I need to start report that.' + slots: {} + text: 'SYSTEM: They lost the last game against the 76ers.' + type: 'USER: ' +- - domain: sports + episode_done: false + id: Taskmaster2_UserSimulatorTeacher + labels: + - 'USER: Okay, And how many points is the college York average in?' + slots: {} + text: 'SYSTEM: Starting point guard is Gary Harris.' + type: 'USER: ' +num_episodes: 13840 +num_examples: 138596 diff --git a/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_valid.yml b/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_valid.yml new file mode 100644 index 00000000000..95ef7136d19 --- /dev/null +++ b/parlai/tasks/taskmaster2/test/taskmaster2_UserSimulatorTeacher_valid.yml @@ -0,0 +1,45 @@ +acts: +- - domain: sports + episode_done: false + eval_labels: + - 'USER: ' + id: Taskmaster2_UserSimulatorTeacher + text: 'GOAL: api_name = mls ; name.team = Vancouver Whitecaps FC? | api_name = + mls ; day.match = last Saturday | api_name = mls ; position.player = goalkeeper' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: Hi Assistant. How are you?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Hello.' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: I''m great. I''m a big fan of Major League Soccer, And my favorite team + is Vancouver Whitecaps FC. And I would love to know what place they are in, + the Vancouver Whitecaps FC?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: I''m good and yourself?' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: Thank you. And are they playing right now?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Currently in 7th place in the Western Conference.' + type: 'USER: ' +- - domain: sports + episode_done: false + eval_labels: + - 'USER: Okay, thank you. And who did they play last Saturday?' + id: Taskmaster2_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: No, they''re not scheduled to play today.' + type: 'USER: ' +num_episodes: 1730 +num_examples: 17337 diff --git a/parlai/tasks/taskmaster2/test/taskmaster2_test.yml b/parlai/tasks/taskmaster2/test/taskmaster2_test.yml new file mode 100644 index 00000000000..eae250af098 --- /dev/null +++ b/parlai/tasks/taskmaster2/test/taskmaster2_test.yml @@ -0,0 +1,55 @@ +acts: +- - domain: sports + episode_done: false + eval_labels: + - 'APIS: ' + id: Taskmaster2_SystemTeacher + slots: {} + text: 'APIS: ' + type: 'APIS: ' +- - domain: sports + episode_done: false + eval_labels: + - 'APICALL: api_name = nfl ; name.team = Denver Broncos' + id: Taskmaster2_SystemTeacher + slots: + api_name: nfl + name.team: Denver Broncos + text: 'USER: Hey, what''s the Denver Broncos record?' + type: 'APICALL: ' +- - domain: sports + episode_done: false + eval_labels: + - 'SYSTEM: The Denver Broncos are in currently fourth in the AFC West with a record + of four wins and nine losses.' + id: Taskmaster2_SystemTeacher + slots: + api_name: nfl + name.team: Denver Broncos + record.team: four wins and nine losses + text: 'APIRESP: api_name = nfl ; name.team = Denver Broncos ; record.team = four + wins and nine losses' + type: 'SYSTEM: ' +- - domain: sports + episode_done: false + eval_labels: + - 'APICALL: api_name = nfl ; name.team = Denver Broncos' + id: Taskmaster2_SystemTeacher + slots: + api_name: nfl + name.team: Denver Broncos + text: 'USER: What Conference are they in?' + type: 'APICALL: ' +- - domain: sports + episode_done: false + eval_labels: + - 'SYSTEM: The Denver Broncos played in the American Football Conference in the + west division.' + id: Taskmaster2_SystemTeacher + slots: + api_name: nfl + name.team: Denver Broncos + text: 'APIRESP: api_name = nfl ; name.team = Denver Broncos' + type: 'SYSTEM: ' +num_episodes: 1734 +num_examples: 36584 diff --git a/parlai/tasks/taskmaster2/test/taskmaster2_train.yml b/parlai/tasks/taskmaster2/test/taskmaster2_train.yml new file mode 100644 index 00000000000..5676f053795 --- /dev/null +++ b/parlai/tasks/taskmaster2/test/taskmaster2_train.yml @@ -0,0 +1,48 @@ +acts: +- - domain: sports + episode_done: false + id: Taskmaster2_SystemTeacher + labels: + - 'APIS: ' + slots: {} + text: 'APIS: ' + type: 'APIS: ' +- - domain: sports + episode_done: false + id: Taskmaster2_SystemTeacher + labels: + - 'APICALL: api_name = nba ; name.team = Denver Nuggets' + slots: + api_name: nba + name.player: Nikola Jokic + name.team: Denver Nuggets + text: 'USER: Hey. How are the Denver Nuggets doing this year?' + type: 'APICALL: ' +- - domain: sports + episode_done: false + id: Taskmaster2_SystemTeacher + labels: + - 'SYSTEM: Hello, They''re currently six place in the Western Conference.' + slots: + api_name: nba + place.team: six place + text: 'APIRESP: api_name = nba ; place.team = six place' + type: 'SYSTEM: ' +- - domain: sports + episode_done: false + id: Taskmaster2_SystemTeacher + labels: + - 'APICALL: ' + slots: {} + text: 'USER: Okay. And what division are they in?' + type: 'APICALL: ' +- - domain: sports + episode_done: false + id: Taskmaster2_SystemTeacher + labels: + - 'SYSTEM: There in the Northwest division.' + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' +num_episodes: 13840 +num_examples: 291032 diff --git a/parlai/tasks/taskmaster2/test/taskmaster2_valid.yml b/parlai/tasks/taskmaster2/test/taskmaster2_valid.yml new file mode 100644 index 00000000000..3a1c36b2bdb --- /dev/null +++ b/parlai/tasks/taskmaster2/test/taskmaster2_valid.yml @@ -0,0 +1,43 @@ +acts: +- - domain: sports + episode_done: false + eval_labels: + - 'APIS: ' + id: Taskmaster2_SystemTeacher + slots: {} + text: 'APIS: ' + type: 'APIS: ' +- - domain: sports + episode_done: false + eval_labels: + - 'APICALL: ' + id: Taskmaster2_SystemTeacher + slots: {} + text: 'USER: ' + type: 'APICALL: ' +- - domain: sports + episode_done: false + eval_labels: + - 'SYSTEM: Hello.' + id: Taskmaster2_SystemTeacher + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' +- - domain: sports + episode_done: false + eval_labels: + - 'APICALL: ' + id: Taskmaster2_SystemTeacher + slots: {} + text: 'USER: Hi Assistant. How are you?' + type: 'APICALL: ' +- - domain: sports + episode_done: false + eval_labels: + - 'SYSTEM: I''m good and yourself?' + id: Taskmaster2_SystemTeacher + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' +num_episodes: 1730 +num_examples: 36404