diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/__init__.py b/parlai/crowdsourcing/tasks/qa_data_collection/__init__.py
new file mode 100644
index 00000000000..240697e3247
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/conf/example.yaml b/parlai/crowdsourcing/tasks/qa_data_collection/conf/example.yaml
new file mode 100644
index 00000000000..4037c153714
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/conf/example.yaml
@@ -0,0 +1,17 @@
+#@package _global_
+mephisto:
+ blueprint:
+ world_file: ${task_dir}/worlds.py
+ task_description_file: ${task_dir}/task_description.html
+ custom_source_bundle: ${task_dir}/webapp/build/bundle.js
+ num_conversations: 1
+ task:
+ task_name: parlai-qa-example
+ task_title: "Test ParlAI QA Data Collection Task"
+ task_description: >
+ This is a ParlAI data collection task.
+ task_reward: 0.3
+ task_tags: "dynamic,question answering,testing"
+teacher:
+ task: squad:SquadQATeacher
+ datatype: train
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/run.py b/parlai/crowdsourcing/tasks/qa_data_collection/run.py
new file mode 100644
index 00000000000..64d5bc4bd2e
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/run.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+from dataclasses import dataclass, field
+from itertools import chain
+from typing import List, Any
+
+import hydra
+from omegaconf import DictConfig
+from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import (
+ BLUEPRINT_TYPE,
+ SharedParlAITaskState,
+)
+from mephisto.operations.hydra_config import RunScriptConfig, register_script_config
+from mephisto.operations.operator import Operator
+from mephisto.tools.scripts import load_db_and_process_config
+
+from parlai.agents.repeat_label.repeat_label import RepeatLabelAgent
+from parlai.core.params import ParlaiParser
+from parlai.core.worlds import create_task
+
+
+TASK_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
+
+defaults = [
+ {"mephisto/blueprint": BLUEPRINT_TYPE},
+ {"mephisto/architect": "local"},
+ {"mephisto/provider": "mock"},
+ {"conf": "example"},
+]
+
+
+@dataclass
+class TeacherConfig:
+ task: str = field(default="squad:SquadQATeacher", metadata={"help": ""})
+ datatype: str = field(default="train", metadata={"help": ""})
+
+
+@dataclass
+class TestScriptConfig(RunScriptConfig):
+ defaults: List[Any] = field(default_factory=lambda: defaults)
+ task_dir: str = TASK_DIRECTORY
+ turn_timeout: int = field(
+ default=300,
+ metadata={
+ "help": "Maximum response time before kicking "
+ "a worker out, default 300 seconds"
+ },
+ )
+ teacher: TeacherConfig = TeacherConfig()
+
+
+register_script_config(name="scriptconfig", module=TestScriptConfig)
+
+
+@hydra.main(config_name="scriptconfig")
+def main(cfg: DictConfig) -> None:
+ db, cfg = load_db_and_process_config(cfg)
+
+ parser = ParlaiParser(True, False)
+ opt = parser.parse_args(
+ list(chain.from_iterable(('--' + k, v) for k, v in cfg.teacher.items()))
+ )
+ agent = RepeatLabelAgent(opt)
+ teacher = create_task(opt, agent).get_task_agent()
+
+ world_opt = {"turn_timeout": cfg.turn_timeout, "teacher": teacher}
+
+ custom_bundle_path = cfg.mephisto.blueprint.get("custom_source_bundle", None)
+ if custom_bundle_path is not None:
+ assert os.path.exists(custom_bundle_path), (
+ "Must build the custom bundle with `npm install; npm run dev` from within "
+ f"the {TASK_DIRECTORY}/webapp directory in order to demo a custom bundle "
+ )
+ world_opt["send_task_data"] = True
+
+ shared_state = SharedParlAITaskState(
+ world_opt=world_opt, onboarding_world_opt=world_opt
+ )
+
+ operator = Operator(db)
+
+ operator.validate_and_run_config(cfg.mephisto, shared_state)
+ operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/task_description.html b/parlai/crowdsourcing/tasks/qa_data_collection/task_description.html
new file mode 100644
index 00000000000..9a85fad4d20
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/task_description.html
@@ -0,0 +1,15 @@
+
If you see this, it means the task description has loaded successfully
+
+
What is this task?
+
+ In this task, you'll provide a question and the corresponding answer from the passage provided.
+
+
+
Task Description Information!
+
+ Task Descriptions can take arbitrary HTML if you want... It can be great for
+ tasks that require simple formatting and descriptions, but if you want
+ to do anything more complex it's better to copy the
+ blueprints/parlai_chat/source directory, override relevant
+ components, build, and then use the --custom-source-bundle flag.
+
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/.babelrc b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/.babelrc
new file mode 100644
index 00000000000..5507f2e868d
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/.babelrc
@@ -0,0 +1,4 @@
+{
+ "presets": ["@babel/env", "@babel/preset-react"],
+ "plugins": ["@babel/plugin-proposal-class-properties"]
+}
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/package.json b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/package.json
new file mode 100644
index 00000000000..f3cd904e5cd
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/package.json
@@ -0,0 +1,45 @@
+{
+ "name": "parlai-mturk-task-compiler",
+ "version": "1.0.0",
+ "description": "",
+ "main": "webpack.config.js",
+ "scripts": {
+ "dev": "webpack --mode development -q"
+ },
+ "keywords": [],
+ "author": "",
+ "dependencies": {
+ "bootstrap": "^4.3.1",
+ "bootstrap-chat": "^1.0.7",
+ "mephisto-task": "^1.0.13",
+ "rc-slider": "^8.6.3",
+ "react": "16.13.1",
+ "react-bootstrap": "^0.32.4",
+ "react-dom": "16.13.1",
+ "react-table": "^6.8.6",
+ "react-fluid-textarea": "0.0.2"
+ },
+ "devDependencies": {
+ "@babel/cli": "^7.1.0",
+ "@babel/core": "^7.1.0",
+ "@babel/plugin-proposal-class-properties": "^7.1.0",
+ "@babel/preset-env": "^7.1.0",
+ "@babel/preset-react": "^7.0.0",
+ "@typescript-eslint/eslint-plugin": "^2.34.0",
+ "@typescript-eslint/parser": "^2.34.0",
+ "babel-eslint": "^10.1.0",
+ "babel-loader": "^8.0.2",
+ "css-loader": "^1.0.0",
+ "eslint": "^6.8.0",
+ "eslint-config-react-app": "^5.2.1",
+ "eslint-plugin-flowtype": "^4.7.0",
+ "eslint-plugin-import": "^2.20.2",
+ "eslint-plugin-jsx-a11y": "^6.2.3",
+ "eslint-plugin-react": "^7.20.0",
+ "eslint-plugin-react-hooks": "^2.5.1",
+ "style-loader": "^0.23.0",
+ "url-loader": "^2.0.1",
+ "webpack": "^4.19.1",
+ "webpack-cli": "^3.1.1"
+ }
+}
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/main.js b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/main.js
new file mode 100644
index 00000000000..d1ce40f49be
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/main.js
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+import React from "react";
+import ReactDOM from "react-dom";
+import "bootstrap-chat/styles.css";
+import ResizableTextArea from 'react-fluid-textarea';
+
+import { ChatApp, ChatMessage, DefaultTaskDescription } from "bootstrap-chat";
+
+function RenderChatMessage({ message, mephistoContext, appContext, idx }) {
+ const { agentId } = mephistoContext;
+ const { currentAgentNames } = appContext.taskContext;
+
+ return (
+
+ );
+}
+
+function logSelection(event) {
+ const selection = event.target.value.substring(event.target.selectionStart, event.target.selectionEnd);
+ console.log(selection)
+}
+
+function Passage({passage}) {
+
+ // Formatting to make textarea look like div, span selection works best on textarea
+ const mystyle = {
+ outline: "none",
+ backgroundColor: "#dff0d8",
+ width: "100%",
+ border: "0px"
+ };
+ if (passage) {
+ return (
+
Passage
+
+
)
+ }
+ return null
+}
+
+
+function MainApp() {
+ const [passage, setPassage] = React.useState("");
+
+ // Currently no way to display task description without changing Mephisto files
+ return (
+ (
+
+ )}
+ renderSidePane={({ mephistoContext: { taskConfig } }) => (
+
+
+
+ )}
+ onMessagesChange={(messages) => {
+ if (messages.length > 0 && 'passage' in messages[messages.length - 1]) {
+ console.log("setting passage");
+ setPassage(messages[messages.length - 1].passage)
+ }
+ }}
+ />
+ );
+}
+
+ReactDOM.render(, document.getElementById("app"));
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/index.html b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/index.html
new file mode 100644
index 00000000000..5a1a77bfb4b
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/index.html
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+ MTurk Chat
+
+
+
+
+
+
+
+
+
+
+
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/notif.mp3 b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/notif.mp3
new file mode 100644
index 00000000000..baf6a4a4ada
Binary files /dev/null and b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/notif.mp3 differ
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/webpack.config.js b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/webpack.config.js
new file mode 100644
index 00000000000..0fd10cc913d
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/webpack.config.js
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+var path = require("path");
+var webpack = require("webpack");
+
+module.exports = {
+ entry: "./src/main.js",
+ output: {
+ path: __dirname,
+ filename: "build/bundle.js",
+ },
+ node: {
+ net: "empty",
+ dns: "empty",
+ },
+ resolve: {
+ alias: {
+ react: path.resolve("./node_modules/react"),
+ "mephisto-task": path.resolve("./node_modules/mephisto-task"),
+ },
+ },
+ module: {
+ rules: [
+ {
+ test: /\.(js|jsx)$/,
+ loader: "babel-loader",
+ exclude: /node_modules/,
+ options: { presets: ["@babel/env"] },
+ },
+ {
+ test: /\.css$/,
+ loader: "style-loader!css-loader",
+ },
+ {
+ test: /\.(svg|png|jpe?g|ttf)$/,
+ loader: "url-loader?limit=100000",
+ },
+ {
+ test: /\.jpg$/,
+ loader: "file-loader",
+ },
+ ],
+ },
+};
diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/worlds.py b/parlai/crowdsourcing/tasks/qa_data_collection/worlds.py
new file mode 100644
index 00000000000..d5111f94279
--- /dev/null
+++ b/parlai/crowdsourcing/tasks/qa_data_collection/worlds.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from parlai.crowdsourcing.utils.worlds import CrowdTaskWorld
+from parlai.core.worlds import validate
+
+
+class QADataCollectionWorld(CrowdTaskWorld):
+ """
+ World for recording a turker's question and answer given a context.
+
+ Assumes the context is a random context from a given task, e.g. from SQuAD, CBT,
+ etc.
+ """
+
+ collector_agent_id = 'QA Collector'
+
+ def __init__(self, opt, agent):
+ self.teacher = opt["teacher"]
+ self.agent = agent
+ self.agent.agent_id = "QA Agent"
+ self.episodeDone = False
+ self.context = None
+ self.question = None
+ self.answer = None
+ self.opt = opt
+
+ def parley(self):
+
+ act = {'episode_done': False}
+ act['id'] = self.__class__.collector_agent_id
+
+ if not self.question:
+ """
+ First, the QA Collector agent provides the context and prompts the turker to
+ ask a question regarding the context.
+ """
+
+ # Get context from dataloader
+ passage = self.teacher.act()
+ self.context = passage['text']
+ act['passage'] = passage['text']
+
+ # Add a prompt telling the turker what to do next
+ act['text'] = 'Please provide a question given the passage.'
+ self.agent.observe(validate(act))
+ self.question = self.agent.act(timeout=self.opt["turn_timeout"])
+ # Can log the turker's question here
+ return
+
+ if not self.answer:
+ """
+ Next, the QA Collector collects the turker's question, and then prompts the
+ turker to provide the answer.
+ """
+
+ # A prompt telling the turker what to do next
+ act['text'] = 'Thanks. And what is the answer to your question?'
+
+ self.agent.observe(validate(act))
+ self.answer = self.agent.act(timeout=self.opt["turn_timeout"])
+
+ self.episodeDone = True
+
+ def episode_done(self):
+ return self.episodeDone
+
+
+def make_world(opt, agents):
+ return QADataCollectionWorld(opt, agents[0])
+
+
+def get_world_params():
+ return {"agent_count": 1}
diff --git a/parlai/tasks/squad/agents.py b/parlai/tasks/squad/agents.py
index ac8b57846af..1a42a49302e 100644
--- a/parlai/tasks/squad/agents.py
+++ b/parlai/tasks/squad/agents.py
@@ -4,14 +4,17 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
-from parlai.core.teachers import FixedDialogTeacher, DialogTeacher, ParlAIDialogTeacher
-from parlai.utils.io import PathManager
-from .build import build
-
import copy
import json
import os
+from parlai.core.message import Message
+from parlai.core.opt import Opt
+from parlai.core.teachers import FixedDialogTeacher, DialogTeacher, ParlAIDialogTeacher
+from parlai.tasks.wrapper.agents import AbstractWrapperTeacher
+from parlai.utils.io import PathManager
+from .build import build
+
def get_sentence_tokenizer():
"""
@@ -372,3 +375,25 @@ def get(self, episode_idx, entry_idx=None):
del action['context']
return action
+
+
+class SquadQATeacher(AbstractWrapperTeacher):
+ """
+ Wrapper Teacher over SQuAD to get only the passage, and ignore the question.
+ """
+
+ @classmethod
+ def add_cmdline_args(cls, parser):
+ parser.set_defaults(wrapper_task='squad')
+
+ def __init__(self, opt: Opt, shared=None):
+ super().__init__(opt, shared)
+
+ def _edit_action(self, act: Message) -> Message:
+
+ """
+ # SQuAD returns passage and question both, only passage required for task.
+ """
+ passage = act['text'].split('\n')[0]
+ act.force_set('text', passage)
+ return act