[TOD][Datasets][Easy] Multidogo -> TOD Conversations format

Title. I only include System + UserSimulator Teachers here since that's all we need right now from dataset. Datasets added in this substack: * Google SGD * Google SGD Simulation Splits (In-domain, Out-domain) * MetalWoz * **MSR_E2E** * Multidogo * MultiWoz V2.2 * Taskmaster * Taskmaster2 * Taskmaster3 (TicketTalk) Test plan: Regression test, `parlai dd` of dataset
facebookresearch · moyapchen · Mar 24, 2022 · Nov 15, 2021 · Nov 16, 2021 · Nov 16, 2021
commit 61f704112457257a3cb22604e11d7b739d3be74c
diff --git a/parlai/tasks/multidogo/__init__.py b/parlai/tasks/multidogo/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/parlai/tasks/multidogo/agents.py b/parlai/tasks/multidogo/agents.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+MultiDoGo implementation for ParlAI.
+
+NOTE: There is still missing data in the open source version of this; implementation is not complete. See https://github.com/awslabs/multi-domain-goal-oriented-dialogues-dataset/issues/1
+"""
+
+from typing import Optional
+from parlai.core.params import ParlaiParser
+import copy
+import json
+import os
+from parlai.core.opt import Opt
+from parlai.utils.data import DatatypeHelper
+import parlai.core.tod.tod_core as tod
+import parlai.core.tod.tod_agents as tod_agents
+import parlai.tasks.multidogo.build as build_
+from parlai.tasks.multidogo.build import get_processed_multidogo_folder
+from parlai.tasks.multidogo.build import (
+    DOMAINS,
+    SENTENCE_INTENT,
+    TURN_INTENT,
+    TURN_AND_SENTENCE_INTENT,
+)
+
+INTENT_ANNOTATION_TYPES = [SENTENCE_INTENT, TURN_INTENT, TURN_AND_SENTENCE_INTENT]
+
+
+class MultidogoParser(tod_agents.TodStructuredDataParser):
+    @classmethod
+    def add_cmdline_args(
+        cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None
+    ) -> ParlaiParser:
+        parser = super().add_cmdline_args(parser, partial_opt)
+        parser.add_argument(
+            "--multidogo-domains",
+            nargs="+",
+            default=DOMAINS,
+            choices=DOMAINS,
+            help="Uses last passed in configuration.",
+        )
+        parser.add_argument(
+            "--intent-type",
+            default=TURN_INTENT,
+            choices=INTENT_ANNOTATION_TYPES,
+            help="Sets the type of intent classification labels. Sentence annotations represented as a list with adjacent entries of the same type deduped.",
+        )
+        return parser
+
+    def __init__(self, opt: Opt, shared=None):
+        self.fold = DatatypeHelper.fold(opt["datatype"])
+        self.dpath = os.path.join(opt["datapath"], "multidogo")
+        opt["datafile"] = self.fold
+        build_.build(opt)
+        super().__init__(opt, shared)
+
+    def setup_episodes(self, fold):
+        result = []
+        domains = self.opt.get("multidogo_domains", DOMAINS)
+        intent_type = self.opt.get("intent-type", TURN_INTENT)
+        for _conv_id, domain, conversation in self._iterate_over_conversations(
+            domains, intent_type
+        ):
+            if len(conversation) == 0 or not (
+                all(["role" in turn for turn in conversation.values()])
+            ):
+                continue
+            rounds = []
+            prev_role = conversation["0"]["role"]
+            if prev_role == "customer":
+                user_utt = [conversation["0"]["text"]]
+                api_call = conversation["0"].get("slots", {})
+                api_resp = {}
+                sys_utt = []
+            else:
+                user_utt = ["__SILENCE__"]
+                api_call = {}
+                api_resp = conversation["0"].get("slots", {})
+                sys_utt = [conversation["0"]["text"]]
+            all_calls = api_call
+            api_call = {tod.STANDARD_API_NAME_SLOT: domain}
+            for i in range(1, len(conversation)):
+                turn = conversation[str(i)]
+                if prev_role == "agent" and prev_role != turn["role"]:
+                    rounds.append(
+                        tod.TodStructuredRound(
+                            user_utt="\n".join(user_utt),
+                            api_call_machine=api_call,
+                            api_resp_machine=api_resp,
+                            sys_utt="\n".join(sys_utt),
+                        )
+                    )
+                    user_utt = []
+                    api_call = {tod.STANDARD_API_NAME_SLOT: domain}
+                    api_resp = {}
+                    sys_utt = []
+                prev_role = turn["role"]
+                slot = turn.get("slots", {})
+                if prev_role == "customer":
+                    user_utt.append(turn["text"])
+                    api_call.update(slot)
+                    all_calls.update(slot)
+                else:
+                    api_resp.update(slot)
+                    sys_utt.append(turn["text"])
+
+            rounds.append(
+                tod.TodStructuredRound(
+                    user_utt=user_utt,
+                    api_call_machine=api_call,
+                    api_resp_machine=api_resp,
+                    sys_utt=sys_utt,
+                )
+            )
+            goal_calls = copy.deepcopy(all_calls)
+            goal_calls[tod.STANDARD_API_NAME_SLOT] = domain
+            result.append(
+                tod.TodStructuredEpisode(
+                    domain=domain,
+                    api_schemas_machine=[
+                        {
+                            tod.STANDARD_API_NAME_SLOT: domain,
+                            tod.STANDARD_OPTIONAL_KEY: all_calls.keys(),
+                        }
+                    ],
+                    goal_calls_machine=[goal_calls],
+                    rounds=rounds,
+                )
+            )
+        return result
+
+    def _iterate_over_conversations(self, domains, intent):
+        for domain in domains:
+            data_folder = get_processed_multidogo_folder(
+                self.dpath, domain, self.fold, intent
+            )
+            for filename in os.listdir(data_folder):
+                if filename.endswith(".json"):
+                    with open(data_folder + "/" + filename) as f:
+                        data = json.load(f)
+                        for conv_id, value in data.items():
+                            yield conv_id, domain, value
+
+    def get_id_task_prefix(self):
+        return "Multidogo"
+
+
+class SystemTeacher(MultidogoParser, tod_agents.TodSystemTeacher):
+    pass
+
+
+class UserSimulatorTeacher(MultidogoParser, tod_agents.TodUserSimulatorTeacher):
+    pass
+
+
+class DefaultTeacher(SystemTeacher):
+    pass
diff --git a/parlai/tasks/multidogo/build.py b/parlai/tasks/multidogo/build.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import parlai.core.build_data as build_data
+from parlai.core.build_data import DownloadableFile
+
+import csv
+from itertools import islice
+from pathlib import Path
+import os
+import json
+import re
+
+DEBUG_MISSING_RAW_CONVERSATIONS = False  # Unnecessary once Amazon fixes multidogo
+
+RESOURCE = DownloadableFile(
+    "https://github.com/awslabs/multi-domain-goal-oriented-dialogues-dataset/archive/master.zip",
+    "raw_data.zip",
+    "fb59c7261da2d30d9d24b9af309ebb4bf0e5b39f97d718201a7160e591e76a3c",
+    zipped=True,
+)
+
+RAW_DATA_PREFIX = "multi-domain-goal-oriented-dialogues-dataset-master/data/"
+
+RAW_DATA_ANNOTATED_DATA_PATH = "paper_splits"
+RAW_DATA_UNANNOTATED_DATA_PATH = "unannotated"
+
+TURN_INTENT = "turn"
+SENTENCE_INTENT = "sentence"
+TURN_AND_SENTENCE_INTENT = "both"
+
+RAW_DATA_SENTENCE_INTENT_PATH = "splits_annotated_at_sentence_level"
+RAW_DATA_TURN_INTENT_PATH = "splits_annotated_at_turn_level"
+
+RAW_DATA_INTENT_BY_TYPE_PATH = {
+    TURN_INTENT: RAW_DATA_TURN_INTENT_PATH,
+    SENTENCE_INTENT: RAW_DATA_SENTENCE_INTENT_PATH,
+}
+
+DOMAINS = ["airline", "fastfood", "finance", "insurance", "media", "software"]
+
+DATATYPE_TO_RAW_DATA_FILE_NAME = {
+    "test": "test.tsv",
+    "train": "train.tsv",
+    "valid": "dev.tsv",
+}
+
+PROCESSED = "processed/"
+
+
+def _preprocess(opt, datapath, datatype):
+    """
+    MultiDoGo conversations take place between an "agent" and a customer". Labeled
+    customer data is stored in one set of files while the agent data is in another.
+    There is a common conversation ID between the two, but the conversations are not
+    listed in a consistent way between the documents. Since we'll have to do work to
+    associate the data between the files anyway, we might as well process the data into
+    a new file that'll be easier to deal with.
+
+    Stores the data as <multidogo_data_path>/processed/<domain>/<datatype>.txt.
+    Will skip preprocessing if this file already exists.
+    """
+    domains = opt.get("domains", DOMAINS)
+    intent_type = opt.get("intent_type", TURN_INTENT)
+
+    for domain in domains:
+        # to see which domain/datatype combo we've built, use a dummy file to mark
+        built_file = _get_processed_multidogo_built_file(
+            datapath, domain, datatype, intent_type
+        )
+        if os.path.isfile(built_file):
+            continue
+        print(
+            f"    Preprocessing '{domain}' data for '{datatype}' with '{intent_type}' intent labels."
+        )
+
+        out_dir = get_processed_multidogo_folder(
+            datapath, domain, datatype, intent_type
+        )
+        Path(out_dir).mkdir(parents=True, exist_ok=True)
+
+        # The agent responses for *all* datatypes are in one file.
+        # We need to iterate through the datatype file to know which lines
+        # we'll actually need... so build a quick lookup table to know which
+        # lines in the tsv file we'll need to care about so we're not scanning
+        # through the whole thing a bunch
+        unannotated_id_map = _build_conversation_span_map(
+            _get_unannotated_tsv_data(datapath, domain)
+        )
+
+        # Actually do the work of collating all of the conversations + annotations
+        # For turn + sentence intent labels, we do two passes, one for sentence
+        # then one for turn so that we do not add two sets of labels for the
+        # same conversation ID. We can use this forced structure to do the
+        # separate categories of turn intent and sentence intent labels.  We
+        # also do a bit of chuking
+        file_idx = 0
+        seen_conversations_set = set()
+        if intent_type == TURN_AND_SENTENCE_INTENT or intent_type == SENTENCE_INTENT:
+            file_idx, seen_conversations_set = _aggregate_and_write_conversations(
+                intent_type,
+                SENTENCE_INTENT,
+                datapath,
+                domain,
+                datatype,
+                unannotated_id_map,
+                start_file_idx=file_idx,
+                skip_ids=set(),
+            )
+
+        if intent_type == TURN_AND_SENTENCE_INTENT or intent_type == TURN_INTENT:
+            _, _ = _aggregate_and_write_conversations(
+                intent_type,
+                TURN_INTENT,
+                datapath,
+                domain,
+                datatype,
+                unannotated_id_map,
+                start_file_idx=file_idx,
+                skip_ids=seen_conversations_set,
+            )
+
+        # mark that we've built this combinations
+        open(built_file, "a").close()
+
+
+def get_processed_multidogo_folder(datapath, domain, datatype, intent_type):
+    return os.path.join(datapath, PROCESSED, domain, intent_type, datatype)
+
+
+def _get_processed_multidogo_built_file(datapath, domain, datatype, intent_type):
+    return os.path.join(
+        get_processed_multidogo_folder(datapath, domain, datatype, intent_type),
+        ".build",
+    )
+
+
+# unannotated data is UNANNOTATED_DATA_PROFIX + <domain> + '.tsv'
+# annotated data is ANNOTATED_DATA_PATH + <annotations type> + <domain> + '/' + <datatype> + '.tsv'
+def _get_unannotated_tsv_data(datapath, domain):
+    file_name = os.path.join(
+        datapath, RAW_DATA_PREFIX, RAW_DATA_UNANNOTATED_DATA_PATH, domain + ".tsv"
+    )
+    return csv.reader(open(file_name, "r"), delimiter=",")  # comma-separated tsv, lol
+
+
+def _get_annotated_tsv_data(datapath, domain, datatype, annotation_type):
+    file_name = os.path.join(
+        datapath,
+        RAW_DATA_PREFIX,
+        RAW_DATA_ANNOTATED_DATA_PATH,
+        RAW_DATA_INTENT_BY_TYPE_PATH[annotation_type],
+        domain,
+        DATATYPE_TO_RAW_DATA_FILE_NAME[datatype],
+    )
+    return csv.reader(open(file_name, "r"), delimiter="\t")
+
+
+def _build_conversation_span_map(unannotated_tsv_object):
+    result = {}  # conversationId to (start line, length) map
+    start = 0
+    prev_conversation_id = ""
+    length = 0
+    for i, row in enumerate(unannotated_tsv_object):
+        conversation_id = row[0][
+            4:-2
+        ]  # do substring cause conversationId has extra filler in unannotated
+        if conversation_id != prev_conversation_id:
+            result[prev_conversation_id] = (start, length)
+            start = i
+            prev_conversation_id = conversation_id
+            length = 0
+        length += 1
+    result[conversation_id] = (start, length)
+    return result
+
+
+def _get_slots_map(utterance, slot_string):
+    values = slot_string.split(" ")
+    cleaned = re.sub(r"[^\w\s]", "", utterance)
+    words = cleaned.split(" ")
+    result = {}
+    for i in range(len(words)):
+        if values[i] != "O":
+            result[values[i]] = words[i]
+    return result
+
+
+def _aggregate_and_write_conversations(
+    raw_intent_type,
+    fetch_intent_type,
+    datapath,
+    domain,
+    datatype,
+    unannotated_id_map,
+    skip_ids,
+    start_file_idx=0,
+):
+    conversations_to_write = {}  # conversationId -> list of turns
+    seen_conversations = set()
+    out_dir = get_processed_multidogo_folder(
+        datapath, domain, datatype, raw_intent_type
+    )
+    file_idx = start_file_idx
+    intent_tsv = _get_annotated_tsv_data(datapath, domain, datatype, fetch_intent_type)
+    next(intent_tsv)  # don't need the header in the first line
+    for labeled_line in intent_tsv:
+        conversation_id = labeled_line[0]
+        if conversation_id in skip_ids:
+            continue
+        if conversation_id not in seen_conversations:
+            # new conversation, add text of conversation to conversations_to_write
+            conversations_to_write[conversation_id] = {}
+            found_raw_conversation = _add_utterances(
+                unannotated_id_map,
+                conversation_id,
+                conversations_to_write,
+                datapath,
+                domain,
+            )
+            seen_conversations.add(conversation_id)
+            if not found_raw_conversation:
+                if DEBUG_MISSING_RAW_CONVERSATIONS:
+                    print(f"Could not find raw conversations for {conversation_id}")
+                skip_ids.add(conversation_id)
+                conversations_to_write.pop(conversation_id, None)
+                continue
+        if fetch_intent_type == SENTENCE_INTENT:
+            _get_sentence_labels_and_slots_map(labeled_line, conversations_to_write)
+        elif fetch_intent_type == TURN_INTENT:
+            _get_turn_labels_and_slots_map(labeled_line, conversations_to_write)
+        else:
+            raise KeyError(
+                "Invalid `fetch_intent_type`. This case should never be hit. Something is broken in the `build.py` file."
+            )
+    # Don't forget to dump out last file
+    with open(f"{out_dir}/{file_idx}.json", "w+") as out_file:
+        json.dump(conversations_to_write, out_file, indent=4)
+        file_idx += 1
+    # Return necessary outputs for next pass
+    return file_idx, seen_conversations
+
+
+def _add_utterances(
+    unannotated_id_map, conversation_id, conversations_to_write, datapath, domain
+):
+    try:
+        start, length = unannotated_id_map[conversation_id]
+    except KeyError:
+        return False
+    conversation_text = islice(
+        _get_unannotated_tsv_data(datapath, domain), start, start + length
+    )
+
+    for line in conversation_text:
+        # Format of unannotated: conversationId,turnNumber,utteranceId,utterance,authorRole
+        conversations_to_write[conversation_id] = {
+            **conversations_to_write[conversation_id],
+            int(line[1]): {"text": line[3], "role": line[4]},
+        }
+    return True
+
+
+def _get_sentence_labels_and_slots_map(labeled_line, output):
+    # Sentence tsv format: conversationId   turnNumber  sentenceNumber  utteranceId utterance   slot-labels intent
+    conversation_id = labeled_line[0]
+    turn_number = int(float(labeled_line[1]))  # cause a few got saved as float.
+    if conversation_id not in output:
+        raise RuntimeError("Should never happen; raw conversation text should be here")
+    if turn_number not in output[conversation_id]:
+        output[conversation_id][turn_number] = {}
+    output[conversation_id][turn_number] = {
+        **output[conversation_id][turn_number],
+        "slots": _get_slots_map(labeled_line[4], labeled_line[5]),
+    }
+    if "intents" not in output[conversation_id][turn_number]:
+        output[conversation_id][turn_number]["intents"] = []
+    output[conversation_id][turn_number]["intents"].append(labeled_line[6])
+
+
+def _get_turn_labels_and_slots_map(labeled_line, output):
+    # Turn tsv format: conversationId  turnNumber  utteranceId utterance   slot-labels intent
+    conversation_id = labeled_line[0]
+    turn_number = int(float(labeled_line[1]))  # cause a few got saved as float
+    if conversation_id not in output:
+        raise RuntimeError("Should never happen; raw conversation text should be here")
+    if turn_number not in output[conversation_id]:
+        output[conversation_id][turn_number] = {}
+    output[conversation_id][turn_number] = {
+        **output[conversation_id][turn_number],
+        "slots": _get_slots_map(labeled_line[3], labeled_line[4]),
+        "intents": [labeled_line[5]],
+    }
+
+
+def build(opt):
+    # get path to data directory
+    datapath = os.path.join(opt["datapath"], "multidogo")
+    # define version if any
+    version = "v1.1"
+
+    # check if data had been previously downloaded
+    if not build_data.built(datapath, version_string=version):
+        print("[building data: " + datapath + "]")
+
+        # make a clean directory if needed
+        if build_data.built(datapath):
+            # an older version exists, so remove these outdated files.
+            build_data.remove_dir(datapath)
+        build_data.make_dir(datapath)
+
+        # Download the data.
+        RESOURCE.download_file(datapath)
+
+        # mark the data as built
+        build_data.mark_done(datapath, version_string=version)
+
+        # do preprocessing on the data to put it into FBDialogueData format
+        for fold in ["train", "valid", "test"]:
+            _preprocess(opt, datapath, fold)
diff --git a/parlai/tasks/multidogo/test.py b/parlai/tasks/multidogo/test.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from parlai.utils.testing import AutoTeacherTest
+
+
+class TestSystemTeacher(AutoTeacherTest):
+    task = "multidogo:SystemTeacher"
+
+
+class TestUserSimulatorTeacher(AutoTeacherTest):
+    task = "multidogo:UserSimulatorTeacher"
diff --git a/parlai/tasks/multidogo/test/multidogo_SystemTeacher_test.yml b/parlai/tasks/multidogo/test/multidogo_SystemTeacher_test.yml
@@ -0,0 +1,47 @@
+acts:
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'APIS: '
+    id: Multidogo_SystemTeacher
+    slots: {}
+    text: 'APIS: '
+    type: 'APIS: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'APICALL: api_name = airline'
+    id: Multidogo_SystemTeacher
+    slots:
+      api_name: airline
+    text: 'USER: HELLO ROBIN'
+    type: 'APICALL: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'SYSTEM: Hello! Good morning. You''ve reached LMT Airways. How may I assist
+      you today?'
+    id: Multidogo_SystemTeacher
+    slots: {}
+    text: 'APIRESP: '
+    type: 'SYSTEM: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'APICALL: api_name = airline'
+    id: Multidogo_SystemTeacher
+    slots:
+      api_name: airline
+    text: 'USER: I NEED BOARDING PASS '
+    type: 'APICALL: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'SYSTEM: Awesome! I''d be glad to help you with that. May I know your last name
+      please?'
+    id: Multidogo_SystemTeacher
+    slots: {}
+    text: 'APIRESP: '
+    type: 'SYSTEM: '
+num_episodes: 2316
+num_examples: 43104
diff --git a/parlai/tasks/multidogo/test/multidogo_SystemTeacher_train.yml b/parlai/tasks/multidogo/test/multidogo_SystemTeacher_train.yml
@@ -0,0 +1,49 @@
+acts:
+- - domain: airline
+    episode_done: false
+    id: Multidogo_SystemTeacher
+    labels:
+    - 'APIS: '
+    slots: {}
+    text: 'APIS: '
+    type: 'APIS: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_SystemTeacher
+    labels:
+    - 'APICALL: api_name = airline'
+    slots:
+      api_name: airline
+    text: 'USER: __SILENCE__'
+    type: 'APICALL: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_SystemTeacher
+    labels:
+    - 'SYSTEM: Welcome to High flying customer service! You''re connected to our customer
+      associate! Good morning! My name is Sam, How may I help you?'
+    slots: {}
+    text: 'APIRESP: '
+    type: 'SYSTEM: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_SystemTeacher
+    labels:
+    - 'APICALL: api_name = airline'
+    slots:
+      api_name: airline
+    text: 'USER: HI,GOOD MORNING
+
+      I WANTS TO BOOK A TICKET FOR FLIGHT'
+    type: 'APICALL: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_SystemTeacher
+    labels:
+    - 'SYSTEM: Absolutely!! I''d be happy to book your tickets, I''d request to please
+      share your details with me. May I know your full name please?'
+    slots: {}
+    text: 'APIRESP: '
+    type: 'SYSTEM: '
+num_episodes: 15616
+num_examples: 290050
diff --git a/parlai/tasks/multidogo/test/multidogo_SystemTeacher_valid.yml b/parlai/tasks/multidogo/test/multidogo_SystemTeacher_valid.yml
@@ -0,0 +1,47 @@
+acts:
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'APIS: '
+    id: Multidogo_SystemTeacher
+    slots: {}
+    text: 'APIS: '
+    type: 'APIS: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'APICALL: api_name = airline'
+    id: Multidogo_SystemTeacher
+    slots:
+      api_name: airline
+    text: 'USER: HI GOOD MORNING'
+    type: 'APICALL: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'SYSTEM: Welcome to High flying customer service! You''re connected to our customer
+      associate! Good morning! My name is Sam, How may I help you?'
+    id: Multidogo_SystemTeacher
+    slots: {}
+    text: 'APIRESP: '
+    type: 'SYSTEM: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'APICALL: api_name = airline'
+    id: Multidogo_SystemTeacher
+    slots:
+      api_name: airline
+    text: 'USER: I WANT TO BOOK A TICKET IN FLIGHT'
+    type: 'APICALL: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'SYSTEM: Absolutely!! I''d be happy to book your tickets, I''d request to please
+      share your details with me. May I know your full name please?'
+    id: Multidogo_SystemTeacher
+    slots: {}
+    text: 'APIRESP: '
+    type: 'SYSTEM: '
+num_episodes: 1590
+num_examples: 29662
diff --git a/parlai/tasks/multidogo/test/multidogo_UserSimulatorTeacher_test.yml b/parlai/tasks/multidogo/test/multidogo_UserSimulatorTeacher_test.yml
@@ -0,0 +1,46 @@
+acts:
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: HELLO ROBIN'
+    id: Multidogo_UserSimulatorTeacher
+    text: 'GOAL: api_name = airline ; booking_confirmation_number = 523 ; email_address
+      = gmailcom ; name = mohan'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: I NEED BOARDING PASS '
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Hello! Good morning. You''ve reached LMT Airways. How may I assist
+      you today?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: MOHAN'
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Awesome! I''d be glad to help you with that. May I know your last
+      name please?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: CONFIRMATION NUMBER : moh523'
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Alright Mohan! Could you please share the booking confirmation
+      number?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: Mohan283@gmail.com'
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Great! May I have your email address please?'
+    type: 'USER: '
+num_episodes: 2316
+num_examples: 43104
diff --git a/parlai/tasks/multidogo/test/multidogo_UserSimulatorTeacher_train.yml b/parlai/tasks/multidogo/test/multidogo_UserSimulatorTeacher_train.yml
@@ -0,0 +1,51 @@
+acts:
+- - domain: airline
+    episode_done: false
+    id: Multidogo_UserSimulatorTeacher
+    labels:
+    - 'USER: __SILENCE__'
+    text: 'GOAL: api_name = airline ; arrival_city = singapore ; departure_city =
+      thailand ; email_address = kavigmailcom ; name = kavisri ; number_of_passengers
+      = five'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_UserSimulatorTeacher
+    labels:
+    - 'USER: HI,GOOD MORNING
+
+      I WANTS TO BOOK A TICKET FOR FLIGHT'
+    slots: {}
+    text: 'SYSTEM: Welcome to High flying customer service! You''re connected to our
+      customer associate! Good morning! My name is Sam, How may I help you?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_UserSimulatorTeacher
+    labels:
+    - 'USER: KAVISRI'
+    slots: {}
+    text: 'SYSTEM: Absolutely!! I''d be happy to book your tickets, I''d request to
+      please share your details with me. May I know your full name please?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_UserSimulatorTeacher
+    labels:
+    - 'USER: THAILAND AND SINGAPORE'
+    slots: {}
+    text: 'SYSTEM: It''s nice meeting you kavisri! Could you please share your departure
+      and arrival city?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    id: Multidogo_UserSimulatorTeacher
+    labels:
+    - 'USER: OK'
+    slots: {}
+    text: 'SYSTEM: Perfect! I hope you enjoy the trip! As I''ve checked with my system
+      here, there is one flight of Jet airways operating on 09/20/2018, The timings
+      are, 6:00 Am to 8:00 Am and it is costing you $170 per head. '
+    type: 'USER: '
+num_episodes: 15616
+num_examples: 290050
diff --git a/parlai/tasks/multidogo/test/multidogo_UserSimulatorTeacher_valid.yml b/parlai/tasks/multidogo/test/multidogo_UserSimulatorTeacher_valid.yml
@@ -0,0 +1,47 @@
+acts:
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: HI GOOD MORNING'
+    id: Multidogo_UserSimulatorTeacher
+    text: 'GOAL: api_name = airline ; arrival_city = chennai ; departure_city = mumbai
+      ; email_address = gmailcom ; name = viswa ; start_date = 92018'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: I WANT TO BOOK A TICKET IN FLIGHT'
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Welcome to High flying customer service! You''re connected to our
+      customer associate! Good morning! My name is Sam, How may I help you?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: VISWA '
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Absolutely!! I''d be happy to book your tickets, I''d request to
+      please share your details with me. May I know your full name please?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: MUMBAI TO CHENNAI'
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: Great! It''s nice meeting you Viswa! Could you please share your
+      departure and arrival city?'
+    type: 'USER: '
+- - domain: airline
+    episode_done: false
+    eval_labels:
+    - 'USER: 09/20/2018'
+    id: Multidogo_UserSimulatorTeacher
+    slots: {}
+    text: 'SYSTEM: That''s amazing! I recently visited chennai, It''s such a beautiful
+      place! I hope you enjoy the trip! May I know your preferred date please?'
+    type: 'USER: '
+num_episodes: 1590
+num_examples: 29662