From 5f94b6f62bf73be86cc8e3b5fff961b05926b1b1 Mon Sep 17 00:00:00 2001 From: Moya Chen <72097364+moyapchen@users.noreply.github.com> Date: Wed, 22 Dec 2021 12:08:06 -0600 Subject: [PATCH] [TOD][Dataset][Easy] Google SGD in TOD Conversations format (#4181) --- parlai/tasks/google_sgd/agents.py | 354 ++++++++++-------- parlai/tasks/google_sgd/build.py | 16 +- parlai/tasks/google_sgd/test.py | 10 +- .../google_sgd_UserSimulatorTeacher_test.yml | 51 +++ .../google_sgd_UserSimulatorTeacher_train.yml | 49 +++ .../google_sgd_UserSimulatorTeacher_valid.yml | 46 +++ .../tasks/google_sgd/test/google_sgd_test.yml | 108 ++---- .../google_sgd/test/google_sgd_train.yml | 85 ++--- .../google_sgd/test/google_sgd_valid.yml | 101 ++--- 9 files changed, 466 insertions(+), 354 deletions(-) create mode 100644 parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_test.yml create mode 100644 parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_train.yml create mode 100644 parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_valid.yml diff --git a/parlai/tasks/google_sgd/agents.py b/parlai/tasks/google_sgd/agents.py index 12e55a5deff..bfcdf02b3dc 100644 --- a/parlai/tasks/google_sgd/agents.py +++ b/parlai/tasks/google_sgd/agents.py @@ -8,192 +8,228 @@ Google The Schema-Guided Dialogue(SGD) Dataset implementation for ParlAI. """ -import os +import glob import json -from parlai.core.opt import Opt -from parlai.core.teachers import DialogTeacher -from parlai.utils.misc import warn_once -from parlai.core.message import Message -from parlai.core.metrics import AverageMetric, BleuMetric -from parlai.utils.io import PathManager +import os +from typing import Optional import parlai.tasks.google_sgd.build as build_ +import parlai.core.tod.tod_core as tod +import parlai.core.tod.tod_agents as tod_agents +from parlai.core.tod.tod_core import SerializationHelpers +from parlai.core.params import ParlaiParser +from parlai.core.opt import Opt +from parlai.utils.io import PathManager -class Text2API2TextTeacher(DialogTeacher): - """ - Teacher which produces both API calls and NLG responses. - """ +class GoogleSGDParser(tod_agents.TodStructuredDataParser): + @classmethod + def add_cmdline_args( + cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None + ) -> ParlaiParser: + parser = super().add_cmdline_args(parser, partial_opt) + parser.add_argument( + "--delex", type="bool", default=False, help="Delexicalize labels" + ) + parser.add_argument( + "--filter-dialogue-by-id", + default="", + type=str, + help="Path to a json file of `dialogue_id`s for which we will filter from. Assumes it will contain a map where the keys are a fold and the value is a list of ids", + ) + return parser def __init__(self, opt: Opt, shared=None): - self.fold = opt['datatype'].split(':')[0] - opt['datafile'] = self.fold - self.dpath = os.path.join(opt['datapath'], 'google_sgd') + self.fold = self.get_fold(opt) + opt["datafile"] = self.fold + self.dpath = os.path.join(opt["datapath"], "google_sgd") if shared is None: - warn_once( - "Google SGD is a beta dataset, and format may significantly change." - ) + # full initialize the teacher as this is not a clone build_.build(opt) super().__init__(opt, shared) + def get_fold(self, opt): + return opt["datatype"].split(":")[0] + def _load_data(self, fold): - dataset_fold = 'dev' if fold == 'valid' else fold + dataset_fold = "dev" if fold == "valid" else fold fold_path = os.path.join(self.dpath, dataset_fold) - schema_file = os.path.join(fold_path, 'schema.json') - with PathManager.open(schema_file, 'r') as f: + schema_file = os.path.join(fold_path, "schema.json") + with PathManager.open(schema_file, "r") as f: schema_lookup = {} for schema in json.load(f): - schema_lookup[schema['service_name']] = schema - - dialogs = [] - for file_id in range(1, build_.fold_size(dataset_fold) + 1): - filename = os.path.join(fold_path, f'dialogues_{file_id:03d}.json') - with PathManager.open(filename, 'r') as f: - dialogs += json.load(f) - return schema_lookup, dialogs - - def _get_api_call_and_results(self, sys_turn, schema_lookup): + schema_lookup[schema["service_name"]] = schema + + dialogues = [] + for filename in glob.glob(f"{fold_path}/dialogues*.json"): + with PathManager.open(filename, "r") as f: + dialogues += json.load(f) + + filter_path = self.opt.get("filter_dialogue_by_id", "") + if len(filter_path) > 0: + filtered = [] + with open(filter_path) as f: + dialogues_to_get = json.load(f)[fold] + for dialogue in dialogues: + if dialogue["dialogue_id"] in dialogues_to_get: + filtered.append(dialogue) + assert len(filtered) == len( + dialogues_to_get + ), f"Different number of dialogues found than requested. Are you sure you've got the right form of Google SGD? Did you filter for dialogue ids correctly? len(filtered) = {len(filtered)}, len(dialogues_to_get) = {len(dialogues_to_get)}" + dialogues = filtered + return schema_lookup, dialogues + + def _get_api_call_and_results(self, sys_turn): api_call = {} api_resp = {} - for frame in sys_turn['frames']: - if 'service_call' in frame: + for frame in sys_turn["frames"]: + if "service_call" in frame: # API CALL - method = frame['service_call']['method'] - for slot_type, slot_value in frame['service_call'][ - 'parameters' + for slot_type, slot_value in frame["service_call"][ + "parameters" ].items(): - api_call[f'{method}.{slot_type}'] = slot_value - assert 'service_results' in frame + if slot_value: + api_call[ + f"{slot_type.strip()}" + ] = SerializationHelpers.inner_list_join(slot_value) + api_call[tod.STANDARD_API_NAME_SLOT] = frame["service_call"]["method"] + assert "service_results" in frame # API Resp - if 'actions' in frame: - for action in frame['actions']: - slot_type = action['slot'] - slot_value = action['canonical_values'] - api_resp[slot_type] = slot_value + if "service_results" in frame: + api_resp = {} + service_results = frame["service_results"] + if len(service_results) > 0: + for key, value in service_results[0].items(): + api_resp[key] = SerializationHelpers.inner_list_join(value) return api_call, api_resp - def custom_evaluation( - self, teacher_action: Message, labels, model_response: Message - ): - resp = model_response.get('text') - if not resp: - return - - if teacher_action['type'] == 'apicall' and resp.startswith('apicall: '): - gold = teacher_action['slots'] - slot_strs = resp[9:].split(' ; ') - parsed = {} - for slot_str in slot_strs: - if ' = ' not in slot_str: - if slot_str != '': - # syntactically invalid generations should count against us - self.metrics.add('slot_p', AverageMetric(0)) - continue - name, value = slot_str.split(' = ') - parsed[name] = value - - # slot precision - for k, v in parsed.items(): - self.metrics.add('slot_p', AverageMetric(v == gold.get(k))) - # slot recall - for k, v in gold.items(): - self.metrics.add('slot_r', AverageMetric(v == parsed.get(k))) - elif teacher_action['type'] == 'apiresp': - delex_resp = self._delex(resp, teacher_action['slots']) - delex_label = self._delex(labels[0], teacher_action['slots']) - self.metrics.add( - 'delex_bleu', BleuMetric.compute(delex_resp, [delex_label]) - ) - - def _delex(self, text, slots): - delex = text - for slot, values in slots.items(): - assert isinstance(values, list) - for value in values: - delex = delex.replace(value, slot) - return delex - - def _api_dict_to_str(self, apidict): - return ' ; '.join(f'{k} = {v}' for k, v in apidict.items()) - - def setup_data(self, fold): - schema_lookup, dialogs = self._load_data(fold) - for dialog in dialogs: - # services = dialog['services'] - turns = dialog['turns'] - num_turns = len(turns) - for turn_id in range(0, num_turns, 2): - is_first_turn = turn_id == 0 - + def _get_apis_in_domain(self, schema, domain): + """ + Google SGD includes extra information with the call, so remove these. + """ + result = {} + for intent in schema[domain].get("intents", {}): + here = {} + if "required_slots" in intent and len(intent["required_slots"]) > 0: + here[tod.STANDARD_REQUIRED_KEY] = intent["required_slots"] + if "optional_slots" in intent and len(intent["optional_slots"]) > 0: + here[tod.STANDARD_OPTIONAL_KEY] = intent["optional_slots"] + if "result_slots" in intent: + here["results"] = intent["result_slots"] + result[intent["name"]] = here + return result + + def _get_intent_groundinging(self, schema, domains): + """ + Returns map where keys are intents and values are names of required/optional + slots. + + We do not care about `result_slots` or default values of optional slots. + """ + result = [] + for domain in domains: + apis = self._get_apis_in_domain(schema, domain) + for intent, params in apis.items(): + here = {} + here[tod.STANDARD_API_NAME_SLOT] = intent + if tod.STANDARD_REQUIRED_KEY in params: + here[tod.STANDARD_REQUIRED_KEY] = params[tod.STANDARD_REQUIRED_KEY] + if ( + tod.STANDARD_OPTIONAL_KEY in params + and len(params[tod.STANDARD_OPTIONAL_KEY]) > 0 + ): + here[tod.STANDARD_OPTIONAL_KEY] = params[ + tod.STANDARD_OPTIONAL_KEY + ].keys() + result.append(here) + return result + + def _get_all_service_calls(self, turns): + """ + Searches through all turns in a dialogue for any service calls, returns these. + """ + results = [] + for turn in turns: + for frame in turn["frames"]: + if "service_call" in frame: + call = frame["service_call"] + item = call["parameters"] + item[tod.STANDARD_API_NAME_SLOT] = call["method"] + results.append(item) + return results + + def setup_episodes(self, fold): + """ + Parses Google SGD episodes into TodStructuredEpisode. + """ + schema_lookup, dialogues = self._load_data(fold) + result = [] + for dialogue in dialogues: + domains = {s.split("_")[0].strip() for s in dialogue["services"]} + turns = dialogue["turns"] + rounds = [] + for turn_id in range(0, len(turns), 2): user_turn = turns[turn_id] sys_turn = turns[turn_id + 1] - api_call, api_results = self._get_api_call_and_results( - sys_turn, schema_lookup + api_call, api_results = self._get_api_call_and_results(sys_turn) + r = tod.TodStructuredRound( + user_utt=user_turn["utterance"], + api_call_machine=api_call, + api_resp_machine=api_results, + sys_utt=sys_turn["utterance"], ) - call_str = self._api_dict_to_str(api_call) - resp_str = self._api_dict_to_str(api_results) - if not api_call and not api_results: - # input: user_turn, output: sys_turn - yield { - 'text': user_turn['utterance'], - 'label': sys_turn['utterance'], - 'type': 'text', - }, is_first_turn - elif not api_call and api_results: - yield { - 'text': f"{user_turn['utterance']} api_resp: {resp_str}", - 'label': sys_turn['utterance'], - 'type': 'apiresp', - 'slots': api_results, - }, is_first_turn - elif api_call and api_results: - # input: user_turn, output: api_call - yield { - 'text': user_turn['utterance'], - 'label': f'apicall: {call_str}', - 'type': 'apicall', - 'slots': api_call, - }, is_first_turn - - # system turn, input : api results, output : assistant turn - yield { - 'text': f"api_resp: {resp_str}", - 'label': sys_turn['utterance'], - 'type': 'apiresp', - 'slots': api_results, - }, False - else: - assert ( - api_call and api_results - ), "API call without API results! Check Dataset!" - - -class Text2TextTeacher(Text2API2TextTeacher): - """ - Text-only teacher (with no API calls or slots) - """ - - def setup_data(self, fold): - schema_lookup, dialogs = self._load_data(fold) - for dialog in dialogs: - turns = dialog['turns'] - num_turns = len(turns) - for turn_id in range(0, num_turns, 2): - if turn_id == 0: - is_first_turn = True - else: - is_first_turn = False + rounds.append(r) + # Now that we've got the rounds, make the episode + episode = tod.TodStructuredEpisode( + domain=SerializationHelpers.inner_list_join(domains), + api_schemas_machine=self._get_intent_groundinging( + schema_lookup, set(dialogue["services"]) + ), + goal_calls_machine=self._get_all_service_calls(turns), + rounds=rounds, + delex=self.opt.get("delex"), + extras={"dialogue_id": dialogue["dialogue_id"]}, + ) + result.append(episode) + # check if the number of episodes should be limited and truncate as required + return result - user_turn = turns[turn_id] - sys_turn = turns[turn_id + 1] - # input: user_turn, output: sys_turn - yield { - 'text': user_turn['utterance'], - 'label': sys_turn['utterance'], - 'type': 'text', - }, is_first_turn + def get_id_task_prefix(self): + return "GoogleSGD" + + +class SystemTeacher(GoogleSGDParser, tod_agents.TodSystemTeacher): + pass + + +class DefaultTeacher(SystemTeacher): + pass + + +class UserSimulatorTeacher(GoogleSGDParser, tod_agents.TodUserSimulatorTeacher): + pass + + +class StandaloneApiTeacher(GoogleSGDParser, tod_agents.TodStandaloneApiTeacher): + pass + + +class SingleGoalAgent(GoogleSGDParser, tod_agents.TodSingleGoalAgent): + pass + + +class GoalAgent(GoogleSGDParser, tod_agents.TodGoalAgent): + pass + + +class ApiSchemaAgent(GoogleSGDParser, tod_agents.TodApiSchemaAgent): + pass + + +class UserUttAgent(GoogleSGDParser, tod_agents.TodUserUttAgent): + pass -class DefaultTeacher(Text2API2TextTeacher): +class ApiCallAndSysUttAgent(GoogleSGDParser, tod_agents.TodApiCallAndSysUttAgent): pass diff --git a/parlai/tasks/google_sgd/build.py b/parlai/tasks/google_sgd/build.py index d0432ed1241..d0bf47adce2 100644 --- a/parlai/tasks/google_sgd/build.py +++ b/parlai/tasks/google_sgd/build.py @@ -8,7 +8,7 @@ import parlai.core.build_data as build_data import os -ROOT_URL = 'https://github.com/google-research-datasets/dstc8-schema-guided-dialogue/raw/master' +ROOT_URL = "https://github.com/google-research-datasets/dstc8-schema-guided-dialogue/raw/master" DATA_LEN = {"train": 127, "dev": 20, "test": 34} @@ -18,13 +18,13 @@ def fold_size(fold): def build(opt): # get path to data directory - dpath = os.path.join(opt['datapath'], 'google_sgd') + dpath = os.path.join(opt["datapath"], "google_sgd") # define version if any version = "1.0" # check if data had been previously built if not build_data.built(dpath, version_string=version): - print('[building data: ' + dpath + ']') + print("[building data: " + dpath + "]") # make a clean directory if needed if build_data.built(dpath): @@ -33,16 +33,16 @@ def build(opt): build_data.make_dir(dpath) # Download the data. - for split_type in ['train', 'dev', 'test']: + for split_type in ["train", "dev", "test"]: outpath = os.path.join(dpath, split_type) - filename = 'schema.json' - url = f'{ROOT_URL}/{split_type}/{filename}' + filename = "schema.json" + url = f"{ROOT_URL}/{split_type}/{filename}" build_data.make_dir(outpath) build_data.download(url, outpath, filename) for file_id in range(1, DATA_LEN[split_type] + 1): - filename = f'dialogues_{file_id:03d}.json' - url = f'{ROOT_URL}/{split_type}/{filename}' + filename = f"dialogues_{file_id:03d}.json" + url = f"{ROOT_URL}/{split_type}/{filename}" build_data.download(url, outpath, filename) # mark the data as built diff --git a/parlai/tasks/google_sgd/test.py b/parlai/tasks/google_sgd/test.py index e348c087c61..5ea8078f74e 100644 --- a/parlai/tasks/google_sgd/test.py +++ b/parlai/tasks/google_sgd/test.py @@ -7,13 +7,9 @@ from parlai.utils.testing import AutoTeacherTest -class DisabledTestDefaultTeacher(AutoTeacherTest): +class TestDefaultTeacher(AutoTeacherTest): task = "google_sgd" -class DisabledTestText2API2TextTeacher(AutoTeacherTest): - task = "google_sgd:text2_a_p_i2_text" - - -class DisabledTestText2TextTeacher(AutoTeacherTest): - task = "google_sgd:text2_text" +class TestUserSimulatorTeacher(AutoTeacherTest): + task = "google_sgd:UserSimulatorTeacher" diff --git a/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_test.yml b/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_test.yml new file mode 100644 index 00000000000..8ea369e9cad --- /dev/null +++ b/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_test.yml @@ -0,0 +1,51 @@ +acts: +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Hi, could you get me a restaurant booking on the 8th please?' + id: GoogleSGD_UserSimulatorTeacher + text: 'GOAL: api_name = ReserveRestaurant ; date = 2019-03-08 ; location = Corte + Madera ; number_of_seats = 2 ; restaurant_name = P.f. Chang''s ; time = 12:00 + | api_name = ReserveRestaurant ; date = 2019-03-08 ; location = Corte Madera + ; number_of_seats = 2 ; restaurant_name = Benissimo Restaurant & Bar ; time + = 12:00' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Could you get me a reservation at P.f. Chang''s in Corte Madera at afternoon + 12?' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Any preference on the restaurant, location and time?' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Sure, that is great.' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Please confirm your reservation at P.f. Chang''s in Corte Madera + at 12 pm for 2 on March 8th.' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Could you try booking a table at Benissimo instead?' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Sorry, your reservation could not be made. Could I help you with + something else?' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Sure, may I know if they have vegetarian options and how expensive is + their food?' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Sure, please confirm your reservation at Benissimo Restaurant & + Bar in Corte Madera at 12 pm for 2 on March 8th.' + type: 'USER: ' +num_episodes: 4201 +num_examples: 46498 diff --git a/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_train.yml b/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_train.yml new file mode 100644 index 00000000000..d4ea92d7d1b --- /dev/null +++ b/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_train.yml @@ -0,0 +1,49 @@ +acts: +- - domain: Restaurants + episode_done: false + id: GoogleSGD_UserSimulatorTeacher + labels: + - 'USER: I am feeling hungry so I would like to find a place to eat.' + text: 'GOAL: api_name = FindRestaurants ; city = San Jose ; cuisine = American + | api_name = FindRestaurants ; city = Palo Alto ; cuisine = American ; price_range + = moderate | api_name = ReserveRestaurant ; city = Palo Alto ; date = 2019-03-01 + ; party_size = 2 ; restaurant_name = Bird Dog ; time = 11:30' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_UserSimulatorTeacher + labels: + - 'USER: I would like for it to be in San Jose.' + slots: {} + text: 'SYSTEM: Do you have a specific which you want the eating place to be located + at?' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_UserSimulatorTeacher + labels: + - 'USER: I usually like eating the American type of food.' + slots: {} + text: 'SYSTEM: Is there a specific cuisine type you enjoy, such as Mexican, Italian + or something else?' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_UserSimulatorTeacher + labels: + - 'USER: Can you give me the address of this restaurant.' + slots: {} + text: 'SYSTEM: I see that at 71 Saint Peter there is a good restaurant which is + in San Jose.' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_UserSimulatorTeacher + labels: + - 'USER: Can you give me the phone number that I can contact them with?' + slots: {} + text: 'SYSTEM: If you want to go to this restaurant you can find it at 71 North + San Pedro Street.' + type: 'USER: ' +num_episodes: 16142 +num_examples: 181124 diff --git a/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_valid.yml b/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_valid.yml new file mode 100644 index 00000000000..59d81a949c9 --- /dev/null +++ b/parlai/tasks/google_sgd/test/google_sgd_UserSimulatorTeacher_valid.yml @@ -0,0 +1,46 @@ +acts: +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: I want to make a restaurant reservation for 2 people at half past 11 + in the morning.' + id: GoogleSGD_UserSimulatorTeacher + text: 'GOAL: api_name = ReserveRestaurant ; date = 2019-03-01 ; location = San + Jose ; number_of_seats = 2 ; restaurant_name = Sino ; time = 11:30' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Please find restaurants in San Jose. Can you try Sino?' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: What city do you want to dine in? Do you have a preferred restaurant?' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Yes, thanks. What''s their phone number?' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Confirming: I will reserve a table for 2 people at Sino in San + Jose. The reservation time is 11:30 am today.' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: What''s their address? Do they have vegetarian options on their menu?' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: Your reservation has been made. Their phone number is 408-247-8880.' + type: 'USER: ' +- - domain: Restaurants + episode_done: false + eval_labels: + - 'USER: Thanks very much.' + id: GoogleSGD_UserSimulatorTeacher + slots: {} + text: 'SYSTEM: The street address is 377 Santana Row #1000. They have good vegetarian + options.' + type: 'USER: ' +num_episodes: 2482 +num_examples: 26845 diff --git a/parlai/tasks/google_sgd/test/google_sgd_test.yml b/parlai/tasks/google_sgd/test/google_sgd_test.yml index 1d66bb9f0bb..d55849a814b 100644 --- a/parlai/tasks/google_sgd/test/google_sgd_test.yml +++ b/parlai/tasks/google_sgd/test/google_sgd_test.yml @@ -1,77 +1,45 @@ acts: -- - episode_done: false +- - domain: Restaurants + episode_done: false eval_labels: - - Any preference on the restaurant, location and time? - id: google_sgd - slots: - location: [] - restaurant_name: [] - time: [] - text: 'Hi, could you get me a restaurant booking on the 8th please? api_resp: - time = [] ; restaurant_name = [] ; location = []' - type: apiresp -- - episode_done: false + - 'APIS: ' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'APIS: ' + type: 'APIS: ' +- - domain: Restaurants + episode_done: false eval_labels: - - Please confirm your reservation at P.f. Chang's in Corte Madera at 12 pm for - 2 on March 8th. - id: google_sgd - slots: - date: - - '2019-03-08' - location: - - Corte Madera - number_of_seats: - - '2' - restaurant_name: - - P.f. Chang's - time: - - '12:00' - text: 'Could you get me a reservation at P.f. Chang''s in Corte Madera at afternoon - 12? api_resp: restaurant_name = ["P.f. Chang''s"] ; location = [''Corte Madera''] - ; time = [''12:00''] ; date = [''2019-03-08''] ; number_of_seats = [''2'']' - type: apiresp -- - episode_done: false + - 'APICALL: ' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'USER: Hi, could you get me a restaurant booking on the 8th please?' + type: 'APICALL: ' +- - domain: Restaurants + episode_done: false eval_labels: - - 'apicall: ReserveRestaurant.date = 2019-03-08 ; ReserveRestaurant.location = - Corte Madera ; ReserveRestaurant.number_of_seats = 2 ; ReserveRestaurant.restaurant_name - = P.f. Chang''s ; ReserveRestaurant.time = 12:00' - id: google_sgd - slots: - ReserveRestaurant.date: '2019-03-08' - ReserveRestaurant.location: Corte Madera - ReserveRestaurant.number_of_seats: '2' - ReserveRestaurant.restaurant_name: P.f. Chang's - ReserveRestaurant.time: '12:00' - text: Sure, that is great. - type: apicall -- - episode_done: false + - 'SYSTEM: Any preference on the restaurant, location and time?' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' +- - domain: Restaurants + episode_done: false eval_labels: - - Sorry, your reservation could not be made. Could I help you with something else? - id: google_sgd - slots: - ? '' - : [] - text: 'api_resp: = []' - type: apiresp -- - episode_done: false + - 'APICALL: ' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'USER: Could you get me a reservation at P.f. Chang''s in Corte Madera at + afternoon 12?' + type: 'APICALL: ' +- - domain: Restaurants + episode_done: false eval_labels: - - Sure, please confirm your reservation at Benissimo Restaurant & Bar in Corte - Madera at 12 pm for 2 on March 8th. - id: google_sgd - slots: - date: - - '2019-03-08' - location: - - Corte Madera - number_of_seats: - - '2' - restaurant_name: - - Benissimo Restaurant & Bar - time: - - '12:00' - text: 'Could you try booking a table at Benissimo instead? api_resp: restaurant_name - = [''Benissimo Restaurant & Bar''] ; location = [''Corte Madera''] ; time = - [''12:00''] ; date = [''2019-03-08''] ; number_of_seats = [''2'']' - type: apiresp + - 'SYSTEM: Please confirm your reservation at P.f. Chang''s in Corte Madera at + 12 pm for 2 on March 8th.' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' num_episodes: 4201 -num_examples: 54970 +num_examples: 92996 diff --git a/parlai/tasks/google_sgd/test/google_sgd_train.yml b/parlai/tasks/google_sgd/test/google_sgd_train.yml index cd2b3a48e30..584b6a56561 100644 --- a/parlai/tasks/google_sgd/test/google_sgd_train.yml +++ b/parlai/tasks/google_sgd/test/google_sgd_train.yml @@ -1,54 +1,45 @@ acts: -- - episode_done: false - id: google_sgd +- - domain: Restaurants + episode_done: false + id: GoogleSGD_SystemTeacher labels: - - Do you have a specific which you want the eating place to be located at? - slots: - city: [] - text: 'I am feeling hungry so I would like to find a place to eat. api_resp: city - = []' - type: apiresp -- - episode_done: false - id: google_sgd + - 'APIS: ' + slots: {} + text: 'APIS: ' + type: 'APIS: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_SystemTeacher labels: - - Is there a specific cuisine type you enjoy, such as Mexican, Italian or something - else? - slots: - cuisine: - - Mexican - - Italian - text: 'I would like for it to be in San Jose. api_resp: cuisine = [''Mexican'', - ''Italian'']' - type: apiresp -- - episode_done: false - id: google_sgd + - 'APICALL: ' + slots: {} + text: 'USER: I am feeling hungry so I would like to find a place to eat.' + type: 'APICALL: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_SystemTeacher labels: - - 'apicall: FindRestaurants.city = San Jose ; FindRestaurants.cuisine = American' - slots: - FindRestaurants.city: San Jose - FindRestaurants.cuisine: American - text: I usually like eating the American type of food. - type: apicall -- - episode_done: false - id: google_sgd + - 'SYSTEM: Do you have a specific which you want the eating place to be located + at?' + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_SystemTeacher labels: - - I see that at 71 Saint Peter there is a good restaurant which is in San Jose. - slots: - city: - - San Jose - restaurant_name: - - 71 Saint Peter - text: 'api_resp: restaurant_name = [''71 Saint Peter''] ; city = [''San Jose'']' - type: apiresp -- - episode_done: false - id: google_sgd + - 'APICALL: ' + slots: {} + text: 'USER: I would like for it to be in San Jose.' + type: 'APICALL: ' +- - domain: Restaurants + episode_done: false + id: GoogleSGD_SystemTeacher labels: - - If you want to go to this restaurant you can find it at 71 North San Pedro Street. - slots: - street_address: - - 71 North San Pedro Street - text: 'Can you give me the address of this restaurant. api_resp: street_address - = [''71 North San Pedro Street'']' - type: apiresp + - 'SYSTEM: Is there a specific cuisine type you enjoy, such as Mexican, Italian + or something else?' + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' num_episodes: 16142 -num_examples: 215128 +num_examples: 362248 diff --git a/parlai/tasks/google_sgd/test/google_sgd_valid.yml b/parlai/tasks/google_sgd/test/google_sgd_valid.yml index 8c95cc85c06..3d58a2b919d 100644 --- a/parlai/tasks/google_sgd/test/google_sgd_valid.yml +++ b/parlai/tasks/google_sgd/test/google_sgd_valid.yml @@ -1,70 +1,45 @@ acts: -- - episode_done: false +- - domain: Restaurants + episode_done: false eval_labels: - - What city do you want to dine in? Do you have a preferred restaurant? - id: google_sgd - slots: - location: [] - restaurant_name: [] - text: 'I want to make a restaurant reservation for 2 people at half past 11 in - the morning. api_resp: restaurant_name = [] ; location = []' - type: apiresp -- - episode_done: false + - 'APIS: ' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'APIS: ' + type: 'APIS: ' +- - domain: Restaurants + episode_done: false eval_labels: - - 'Confirming: I will reserve a table for 2 people at Sino in San Jose. The reservation - time is 11:30 am today.' - id: google_sgd - slots: - date: - - '2019-03-01' - location: - - San Jose - number_of_seats: - - '2' - restaurant_name: - - Sino - time: - - '11:30' - text: 'Please find restaurants in San Jose. Can you try Sino? api_resp: restaurant_name - = [''Sino''] ; location = [''San Jose''] ; time = [''11:30''] ; number_of_seats - = [''2''] ; date = [''2019-03-01'']' - type: apiresp -- - episode_done: false + - 'APICALL: ' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'USER: I want to make a restaurant reservation for 2 people at half past + 11 in the morning.' + type: 'APICALL: ' +- - domain: Restaurants + episode_done: false eval_labels: - - 'apicall: ReserveRestaurant.date = 2019-03-01 ; ReserveRestaurant.location = - San Jose ; ReserveRestaurant.number_of_seats = 2 ; ReserveRestaurant.restaurant_name - = Sino ; ReserveRestaurant.time = 11:30' - id: google_sgd - slots: - ReserveRestaurant.date: '2019-03-01' - ReserveRestaurant.location: San Jose - ReserveRestaurant.number_of_seats: '2' - ReserveRestaurant.restaurant_name: Sino - ReserveRestaurant.time: '11:30' - text: Yes, thanks. What's their phone number? - type: apicall -- - episode_done: false + - 'SYSTEM: What city do you want to dine in? Do you have a preferred restaurant?' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' +- - domain: Restaurants + episode_done: false eval_labels: - - Your reservation has been made. Their phone number is 408-247-8880. - id: google_sgd - slots: - ? '' - : [] - phone_number: - - 408-247-8880 - text: 'api_resp: phone_number = [''408-247-8880''] ; = []' - type: apiresp -- - episode_done: false + - 'APICALL: ' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'USER: Please find restaurants in San Jose. Can you try Sino?' + type: 'APICALL: ' +- - domain: Restaurants + episode_done: false eval_labels: - - 'The street address is 377 Santana Row #1000. They have good vegetarian options.' - id: google_sgd - slots: - address: - - '377 Santana Row #1000' - has_vegetarian_options: - - 'True' - text: 'What''s their address? Do they have vegetarian options on their menu? api_resp: - has_vegetarian_options = [''True''] ; address = [''377 Santana Row #1000'']' - type: apiresp + - 'SYSTEM: Confirming: I will reserve a table for 2 people at Sino in San Jose. + The reservation time is 11:30 am today.' + id: GoogleSGD_SystemTeacher + slots: {} + text: 'APIRESP: ' + type: 'SYSTEM: ' num_episodes: 2482 -num_examples: 31825 +num_examples: 53690