diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/__init__.py b/parlai/crowdsourcing/tasks/qa_data_collection/__init__.py new file mode 100644 index 00000000000..240697e3247 --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/conf/example.yaml b/parlai/crowdsourcing/tasks/qa_data_collection/conf/example.yaml new file mode 100644 index 00000000000..4037c153714 --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/conf/example.yaml @@ -0,0 +1,17 @@ +#@package _global_ +mephisto: + blueprint: + world_file: ${task_dir}/worlds.py + task_description_file: ${task_dir}/task_description.html + custom_source_bundle: ${task_dir}/webapp/build/bundle.js + num_conversations: 1 + task: + task_name: parlai-qa-example + task_title: "Test ParlAI QA Data Collection Task" + task_description: > + This is a ParlAI data collection task. + task_reward: 0.3 + task_tags: "dynamic,question answering,testing" +teacher: + task: squad:SquadQATeacher + datatype: train diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/run.py b/parlai/crowdsourcing/tasks/qa_data_collection/run.py new file mode 100644 index 00000000000..64d5bc4bd2e --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/run.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +from dataclasses import dataclass, field +from itertools import chain +from typing import List, Any + +import hydra +from omegaconf import DictConfig +from mephisto.abstractions.blueprints.parlai_chat.parlai_chat_blueprint import ( + BLUEPRINT_TYPE, + SharedParlAITaskState, +) +from mephisto.operations.hydra_config import RunScriptConfig, register_script_config +from mephisto.operations.operator import Operator +from mephisto.tools.scripts import load_db_and_process_config + +from parlai.agents.repeat_label.repeat_label import RepeatLabelAgent +from parlai.core.params import ParlaiParser +from parlai.core.worlds import create_task + + +TASK_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) + +defaults = [ + {"mephisto/blueprint": BLUEPRINT_TYPE}, + {"mephisto/architect": "local"}, + {"mephisto/provider": "mock"}, + {"conf": "example"}, +] + + +@dataclass +class TeacherConfig: + task: str = field(default="squad:SquadQATeacher", metadata={"help": ""}) + datatype: str = field(default="train", metadata={"help": ""}) + + +@dataclass +class TestScriptConfig(RunScriptConfig): + defaults: List[Any] = field(default_factory=lambda: defaults) + task_dir: str = TASK_DIRECTORY + turn_timeout: int = field( + default=300, + metadata={ + "help": "Maximum response time before kicking " + "a worker out, default 300 seconds" + }, + ) + teacher: TeacherConfig = TeacherConfig() + + +register_script_config(name="scriptconfig", module=TestScriptConfig) + + +@hydra.main(config_name="scriptconfig") +def main(cfg: DictConfig) -> None: + db, cfg = load_db_and_process_config(cfg) + + parser = ParlaiParser(True, False) + opt = parser.parse_args( + list(chain.from_iterable(('--' + k, v) for k, v in cfg.teacher.items())) + ) + agent = RepeatLabelAgent(opt) + teacher = create_task(opt, agent).get_task_agent() + + world_opt = {"turn_timeout": cfg.turn_timeout, "teacher": teacher} + + custom_bundle_path = cfg.mephisto.blueprint.get("custom_source_bundle", None) + if custom_bundle_path is not None: + assert os.path.exists(custom_bundle_path), ( + "Must build the custom bundle with `npm install; npm run dev` from within " + f"the {TASK_DIRECTORY}/webapp directory in order to demo a custom bundle " + ) + world_opt["send_task_data"] = True + + shared_state = SharedParlAITaskState( + world_opt=world_opt, onboarding_world_opt=world_opt + ) + + operator = Operator(db) + + operator.validate_and_run_config(cfg.mephisto, shared_state) + operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30) + + +if __name__ == "__main__": + main() diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/task_description.html b/parlai/crowdsourcing/tasks/qa_data_collection/task_description.html new file mode 100644 index 00000000000..9a85fad4d20 --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/task_description.html @@ -0,0 +1,15 @@ +

If you see this, it means the task description has loaded successfully

+
+

What is this task?

+

+ In this task, you'll provide a question and the corresponding answer from the passage provided. +

+

+

Task Description Information!

+

+ Task Descriptions can take arbitrary HTML if you want... It can be great for + tasks that require simple formatting and descriptions, but if you want + to do anything more complex it's better to copy the + blueprints/parlai_chat/source directory, override relevant + components, build, and then use the --custom-source-bundle flag. +

diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/.babelrc b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/.babelrc new file mode 100644 index 00000000000..5507f2e868d --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/.babelrc @@ -0,0 +1,4 @@ +{ + "presets": ["@babel/env", "@babel/preset-react"], + "plugins": ["@babel/plugin-proposal-class-properties"] +} diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/package.json b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/package.json new file mode 100644 index 00000000000..f3cd904e5cd --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/package.json @@ -0,0 +1,45 @@ +{ + "name": "parlai-mturk-task-compiler", + "version": "1.0.0", + "description": "", + "main": "webpack.config.js", + "scripts": { + "dev": "webpack --mode development -q" + }, + "keywords": [], + "author": "", + "dependencies": { + "bootstrap": "^4.3.1", + "bootstrap-chat": "^1.0.7", + "mephisto-task": "^1.0.13", + "rc-slider": "^8.6.3", + "react": "16.13.1", + "react-bootstrap": "^0.32.4", + "react-dom": "16.13.1", + "react-table": "^6.8.6", + "react-fluid-textarea": "0.0.2" + }, + "devDependencies": { + "@babel/cli": "^7.1.0", + "@babel/core": "^7.1.0", + "@babel/plugin-proposal-class-properties": "^7.1.0", + "@babel/preset-env": "^7.1.0", + "@babel/preset-react": "^7.0.0", + "@typescript-eslint/eslint-plugin": "^2.34.0", + "@typescript-eslint/parser": "^2.34.0", + "babel-eslint": "^10.1.0", + "babel-loader": "^8.0.2", + "css-loader": "^1.0.0", + "eslint": "^6.8.0", + "eslint-config-react-app": "^5.2.1", + "eslint-plugin-flowtype": "^4.7.0", + "eslint-plugin-import": "^2.20.2", + "eslint-plugin-jsx-a11y": "^6.2.3", + "eslint-plugin-react": "^7.20.0", + "eslint-plugin-react-hooks": "^2.5.1", + "style-loader": "^0.23.0", + "url-loader": "^2.0.1", + "webpack": "^4.19.1", + "webpack-cli": "^3.1.1" + } +} diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/main.js b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/main.js new file mode 100644 index 00000000000..d1ce40f49be --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/main.js @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017-present, Facebook, Inc. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +import React from "react"; +import ReactDOM from "react-dom"; +import "bootstrap-chat/styles.css"; +import ResizableTextArea from 'react-fluid-textarea'; + +import { ChatApp, ChatMessage, DefaultTaskDescription } from "bootstrap-chat"; + +function RenderChatMessage({ message, mephistoContext, appContext, idx }) { + const { agentId } = mephistoContext; + const { currentAgentNames } = appContext.taskContext; + + return ( + + ); +} + +function logSelection(event) { + const selection = event.target.value.substring(event.target.selectionStart, event.target.selectionEnd); + console.log(selection) +} + +function Passage({passage}) { + + // Formatting to make textarea look like div, span selection works best on textarea + const mystyle = { + outline: "none", + backgroundColor: "#dff0d8", + width: "100%", + border: "0px" + }; + if (passage) { + return (
+

Passage

+ +
) + } + return null +} + + +function MainApp() { + const [passage, setPassage] = React.useState(""); + + // Currently no way to display task description without changing Mephisto files + return ( + ( + + )} + renderSidePane={({ mephistoContext: { taskConfig } }) => ( + + + + )} + onMessagesChange={(messages) => { + if (messages.length > 0 && 'passage' in messages[messages.length - 1]) { + console.log("setting passage"); + setPassage(messages[messages.length - 1].passage) + } + }} + /> + ); +} + +ReactDOM.render(, document.getElementById("app")); diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/index.html b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/index.html new file mode 100644 index 00000000000..5a1a77bfb4b --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/index.html @@ -0,0 +1,32 @@ + + + + + + + + MTurk Chat + + + + + + + +
+ + + diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/notif.mp3 b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/notif.mp3 new file mode 100644 index 00000000000..baf6a4a4ada Binary files /dev/null and b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/src/static/notif.mp3 differ diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/webapp/webpack.config.js b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/webpack.config.js new file mode 100644 index 00000000000..0fd10cc913d --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/webapp/webpack.config.js @@ -0,0 +1,48 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +var path = require("path"); +var webpack = require("webpack"); + +module.exports = { + entry: "./src/main.js", + output: { + path: __dirname, + filename: "build/bundle.js", + }, + node: { + net: "empty", + dns: "empty", + }, + resolve: { + alias: { + react: path.resolve("./node_modules/react"), + "mephisto-task": path.resolve("./node_modules/mephisto-task"), + }, + }, + module: { + rules: [ + { + test: /\.(js|jsx)$/, + loader: "babel-loader", + exclude: /node_modules/, + options: { presets: ["@babel/env"] }, + }, + { + test: /\.css$/, + loader: "style-loader!css-loader", + }, + { + test: /\.(svg|png|jpe?g|ttf)$/, + loader: "url-loader?limit=100000", + }, + { + test: /\.jpg$/, + loader: "file-loader", + }, + ], + }, +}; diff --git a/parlai/crowdsourcing/tasks/qa_data_collection/worlds.py b/parlai/crowdsourcing/tasks/qa_data_collection/worlds.py new file mode 100644 index 00000000000..d5111f94279 --- /dev/null +++ b/parlai/crowdsourcing/tasks/qa_data_collection/worlds.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from parlai.crowdsourcing.utils.worlds import CrowdTaskWorld +from parlai.core.worlds import validate + + +class QADataCollectionWorld(CrowdTaskWorld): + """ + World for recording a turker's question and answer given a context. + + Assumes the context is a random context from a given task, e.g. from SQuAD, CBT, + etc. + """ + + collector_agent_id = 'QA Collector' + + def __init__(self, opt, agent): + self.teacher = opt["teacher"] + self.agent = agent + self.agent.agent_id = "QA Agent" + self.episodeDone = False + self.context = None + self.question = None + self.answer = None + self.opt = opt + + def parley(self): + + act = {'episode_done': False} + act['id'] = self.__class__.collector_agent_id + + if not self.question: + """ + First, the QA Collector agent provides the context and prompts the turker to + ask a question regarding the context. + """ + + # Get context from dataloader + passage = self.teacher.act() + self.context = passage['text'] + act['passage'] = passage['text'] + + # Add a prompt telling the turker what to do next + act['text'] = 'Please provide a question given the passage.' + self.agent.observe(validate(act)) + self.question = self.agent.act(timeout=self.opt["turn_timeout"]) + # Can log the turker's question here + return + + if not self.answer: + """ + Next, the QA Collector collects the turker's question, and then prompts the + turker to provide the answer. + """ + + # A prompt telling the turker what to do next + act['text'] = 'Thanks. And what is the answer to your question?' + + self.agent.observe(validate(act)) + self.answer = self.agent.act(timeout=self.opt["turn_timeout"]) + + self.episodeDone = True + + def episode_done(self): + return self.episodeDone + + +def make_world(opt, agents): + return QADataCollectionWorld(opt, agents[0]) + + +def get_world_params(): + return {"agent_count": 1} diff --git a/parlai/tasks/squad/agents.py b/parlai/tasks/squad/agents.py index ac8b57846af..1a42a49302e 100644 --- a/parlai/tasks/squad/agents.py +++ b/parlai/tasks/squad/agents.py @@ -4,14 +4,17 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from parlai.core.teachers import FixedDialogTeacher, DialogTeacher, ParlAIDialogTeacher -from parlai.utils.io import PathManager -from .build import build - import copy import json import os +from parlai.core.message import Message +from parlai.core.opt import Opt +from parlai.core.teachers import FixedDialogTeacher, DialogTeacher, ParlAIDialogTeacher +from parlai.tasks.wrapper.agents import AbstractWrapperTeacher +from parlai.utils.io import PathManager +from .build import build + def get_sentence_tokenizer(): """ @@ -372,3 +375,25 @@ def get(self, episode_idx, entry_idx=None): del action['context'] return action + + +class SquadQATeacher(AbstractWrapperTeacher): + """ + Wrapper Teacher over SQuAD to get only the passage, and ignore the question. + """ + + @classmethod + def add_cmdline_args(cls, parser): + parser.set_defaults(wrapper_task='squad') + + def __init__(self, opt: Opt, shared=None): + super().__init__(opt, shared) + + def _edit_action(self, act: Message) -> Message: + + """ + # SQuAD returns passage and question both, only passage required for task. + """ + passage = act['text'].split('\n')[0] + act.force_set('text', passage) + return act