diff --git a/parlai/crowdsourcing/__init__.py b/parlai/crowdsourcing/__init__.py new file mode 100644 index 00000000000..240697e3247 --- /dev/null +++ b/parlai/crowdsourcing/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/parlai/crowdsourcing/tasks/__init__.py b/parlai/crowdsourcing/tasks/__init__.py new file mode 100644 index 00000000000..240697e3247 --- /dev/null +++ b/parlai/crowdsourcing/tasks/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/parlai/crowdsourcing/tasks/acute_eval/README.md b/parlai/crowdsourcing/tasks/acute_eval/README.md new file mode 100644 index 00000000000..86bf3f52ccb --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/README.md @@ -0,0 +1,101 @@ +# ACUTE-Eval + +## Paper information + +Margaret Li, Jason Weston, Stephen Roller. +_[ACUTE-EVAL: Improved Dialogue Evaluation with Optimized Questions and Multi-turn Comparisons](https://arxiv.org/abs/1909.03087)_. + +## Citation + +If you use this evaluation method in your own work, please cite with the +following BibTex entry: + + @misc{li2019acuteeval, + title={ACUTE-EVAL: Improved Dialogue Evaluation with Optimized Questions and Multi-turn Comparisons}, + author={Margaret Li and Jason Weston and Stephen Roller}, + year={2019}, + journal={Advances in Neural Information Processing Systems, Conversational AI Workshop}, + url={https://arxiv.org/abs/1909.03087} + } + +# Code Instructions +Once you have installed [ParlAI](https://github.com/facebookresearch/ParlAI/#installing-parlai) and [Mephisto](https://github.com/facebookresearch/mephisto/blob/master/docs/quickstart.md), follow the instructions below. + +The `example_script.py` script is designed to allow you to run this entire task from command line with an invocation like + + python parlai/crowdsourcing/tasks/acute_eval/example_script.py \ + --pairings-filepath parlai/crowdsourcing/tasks/acute_eval/pairings.jsonl + +## Formatting conversation data + +This task code assumes that you've parsed and saved your collected conversations in a simple .jsonl format. The path to this file should be passed in as `--pairings-filepath`. + +This is a template of the expected format with the minimal expected fields: + + { + "is_onboarding": false, + "speakers_to_eval": ["first_modelname", "second_modelname"], + "dialogue_ids": [dialogue_1_id, dialogue_2_id], + "dialogue_dicts": [ + { + "speakers": ["first_modelname", "other_speaker"], + "dialogue": [ + {"id": "model1", "text": "Hi"}, + {"id": "other_speaker", "text": "Hi back"}, + ... + ] + }, + { + "speakers": ["other_speaker", "second_modelname"], + "dialogue": [ + {"id": "model1", "text": "Hi"}, + {"id": "other_speaker", "text": "Hi back"}, + ... + ] + } + ] + } + +You can add an `"image_src"` key to an entry of `"dialogue"` to append an image to a chat message. The value of the key should be a serialized image, starting with a string such `data:image/jpeg;base64,`. + +For onboarding tasks (tasks used to filter workers, see below for more details) you must additionally set a `correct_answer` field: + + { + "is_onboarding": true, + "speakers_to_eval": ["first_modelname", "second_modelname"], + "correct_answer": "correct_modelname", + "dialogue_dicts": [ + # as above + ] + } + +Note that we assume that "dialogue" consists of strictly alternating turns (e.g. speakers a, b, a, b, a...). Additionally, `speakers_to_eval` must be in the same order as the dialogue_dicts. See `pairings.jsonl` for examples of the format required. + +## Question phrasing + +In our paper, we address the problem of wording the questions and binary choices in order to elicit the highest signal responses. The default question and choices correspond to our highest signal 'engagingness' phrasing, but it's very easy to customize this by changing `extra_args['eval_question'], extra_args['s1_choice'], extra_args['s2_choice']` in `example_script.py`. The special strings `` and `` are replaced when showing these questions to the user, and the Speaker's utterances in each conversation will be colored to identify that Speaker. + + +## Onboarding tasks + +As discussed in the paper, we found that we had better annotation quality if we screened Turkers with an 'onboarding' comparison, consisting of a weak baseline conversation and a human-human conversation. Our code is set up so that this is optional. + +By default, `extra_args['block_on_onboarding_fail']` is set to `True`, which means that workers who fail onboarding will be soft-blocked. In other words, they won't be able to see or complete any more hits from you, but won't receive any notification that they've been blocked. The Mechanical Turk qualification name used to soft block must be set with `extra_args['block_qualification']`. + +By setting `extra_args['onboarding_threshold']`, you can also adjust the minimum proportion of onboarding tasks (if you have multiple) that must be answered correctly to pass onboarding. + + +## Other settings + +### Task configuration on MTurk + +The title, description, and keywords of the task as shown on MTurk default to values in `ARG_STRING` in `example_script.py`. These values are used as follows: +- `--task-title`: A short and descriptive title about the kind of task that the HIT contains. On the Amazon Mechanical Turk web site, the HIT title appears in search results and everywhere that the HIT is mentioned. +- `--task-description`: Includes detailed information about the kind of task that the HIT contains. On the Amazon Mechanical Turk web site, the HIT description appears in the expanded view of search results, and in the HIT and assignment screens. +- `--task-tags`: One or more words or phrases that describe the HIT, separated by commas. On MTurk website, these words are used in searches to find HITs. +- `--additional-task-description`: Additional text to show in the left-hand pane of the chat window. + + +### CLI arguments + +A comprehensive list of settings specific to ACUTE-Eval can be found in `add_args_to_group()` in `acute_eval_blueprint.py`. For the arguments most likely to be useful for running ACUTE-Eval, see `example_script.py`. diff --git a/parlai/crowdsourcing/tasks/acute_eval/__init__.py b/parlai/crowdsourcing/tasks/acute_eval/__init__.py new file mode 100644 index 00000000000..240697e3247 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/parlai/crowdsourcing/tasks/acute_eval/acute_eval_agent_state.py b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_agent_state.py new file mode 100644 index 00000000000..df90f48c393 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_agent_state.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Dict, Any, TYPE_CHECKING +from mephisto.server.blueprints.abstract.static_task.static_agent_state import ( + StaticAgentState, +) +import time + +if TYPE_CHECKING: + from mephisto.data_model.packet import Packet + + +DATA_FILE = "agent_data.json" + + +class AcuteEvalAgentState(StaticAgentState): + """ + Agent state for acute eval tasks. + + Equivalent to StaticAgentState but doesn't have file IO. + """ + + def get_parsed_data(self) -> List[Dict[str, Any]]: + data = self.get_data() + assert data is not None, "Should only check parsed data for completed tasks" + response_list = [] + inputs: List[Dict[str, Any]] = data["inputs"] + outputs = data["outputs"] + assert inputs is not None + assert outputs is not None + for idx in range(len(inputs)): + entry: Dict[str, Any] = {} + entry.update(inputs[idx]) + entry.update(outputs["final_data"][idx]) + response_list.append(entry) + return response_list + + def update_data(self, packet: "Packet") -> None: + """ + Process the incoming data packet, and handle updating the state. + """ + assert ( + packet.data.get("MEPHISTO_is_submit") is True + ), "Static tasks should only have final act" + self.state["times"]["task_end"] = time.time() + self.state["outputs"] = packet.data["task_data"] + self.save_data() diff --git a/parlai/crowdsourcing/tasks/acute_eval/acute_eval_blueprint.py b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_blueprint.py new file mode 100644 index 00000000000..0d2bf780a11 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_blueprint.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from mephisto.data_model.blueprint import Blueprint +from mephisto.data_model.assignment import InitializationData +from parlai.crowdsourcing.tasks.acute_eval.acute_eval_agent_state import ( + AcuteEvalAgentState, +) +from parlai.crowdsourcing.tasks.acute_eval.acute_eval_runner import AcuteEvalRunner +from parlai.crowdsourcing.tasks.acute_eval.acute_eval_builder import AcuteEvalBuilder +from mephisto.core.registry import register_mephisto_abstraction + +import os +import math + +from typing import ClassVar, List, Type, Any, Dict, Iterable, TYPE_CHECKING + +if TYPE_CHECKING: + from mephisto.data_model.blueprint import AgentState, TaskRunner, TaskBuilder + from argparse import _ArgumentGroup as ArgumentGroup + +BLUEPRINT_TYPE = "acute_eval" + + +# WISH AcuteEval's blueprint can probably be extended to compare more than just convos +@register_mephisto_abstraction() +class AcuteEvalBlueprint(Blueprint): + """ + Blueprint for a task that asks humans to compare conversational outputs. + """ + + AgentStateClass: ClassVar[Type["AgentState"]] = AcuteEvalAgentState + TaskBuilderClass: ClassVar[Type["TaskBuilder"]] = AcuteEvalBuilder + TaskRunnerClass: ClassVar[Type["TaskRunner"]] = AcuteEvalRunner + supported_architects: ClassVar[List[str]] = ["mock"] # TODO update + BLUEPRINT_TYPE = BLUEPRINT_TYPE + + @classmethod + def assert_task_args(cls, opts: Any) -> None: + """ + Ensure that the data can be properly loaded. + """ + if opts.get("pairings_filepath") is not None: + pairings_filepath = os.path.expanduser(opts["pairings_filepath"]) + assert os.path.exists( + pairings_filepath + ), f"Provided file {pairings_filepath} doesn't exist" + elif opts.get("pairings_task_data") is not None: + assert ( + len(opts.get("pairings_task_data")) > 0 + ), "Length of data dict provided was 0" + else: + raise AssertionError( + "Must provide one of a data csv, json, or a list of tasks" + ) + + if opts.get("block_on_onboarding_fail") is True: + if opts.get("block_qualification") is None: + raise AssertionError( + "Must provide `block_qualification` to use `block_on_onboarding_fail`" + ) + + @classmethod + def add_args_to_group(cls, group: "ArgumentGroup") -> None: + """ + Adds required options for AcuteEvalBlueprints. + + task_source points to the file intending to be deployed for this task + pairings_filepath has the data to be deployed for this task. + """ + super(AcuteEvalBlueprint, cls).add_args_to_group(group) + + group.description = """ + AcuteEvalBlueprint: Tasks launched from acute eval blueprints + require sets of pairings for workers to be able to compare to. + + These pairings can be provided as a csv or by passing a + pairings_task_data dict into extra_args. + """ + group.add_argument( + "--annotations-per-pair", + dest="annotations_per_pair", + type=int, + default=1, + help="Number of annotations per conversation comparison pair", + ) + group.add_argument( + "--pairings-filepath", + dest="pairings_filepath", + type=str, + default=None, + help="path to the file containing the task dictionaries", + ) + group.add_argument( + "--s1-choice", + dest="s1_choice", + type=str, + default="I would prefer to talk to ", + help="text next to speaker 1 radio button", + ) + group.add_argument( + "--s2-choice", + dest="s2_choice", + type=str, + default="I would prefer to talk to ", + help="text next to speaker 2 radio button", + ) + group.add_argument( + "--eval-question", + dest="eval_question", + type=str, + default="Who would you prefer to talk to for a long conversation?", + help='question to present to turker for comparison (e.g. "Which speaker is better?")', + ) + group.add_argument( + "--block-on-onboarding-fail", + dest="block_on_onboarding_fail", + type=bool, + default=True, + help="whether to block on onboarding failure", + ) + group.add_argument( + "--subtasks-per-unit", + dest="subtasks_per_unit", + type=int, + default=5, + help="number of subtasks/comparisons to do per unit", + ) + group.add_argument( + "--onboarding-threshold", + dest="onboarding_threshold", + type=float, + default=0.75, + help="minimum accuracy on onboarding tasks, as a float 0-1.0", + ) + group.add_argument( + "--random-seed", + dest="random_seed", + type=int, + default=42, + help="seed for random", + ) + group.add_argument( + "--additional-task-description", + dest="additional_task_description", + type=str, + default='', + help="Additional text to show on the left pane", + ) + return + + def get_frontend_args(self) -> Dict[str, Any]: + """ + Specifies what options within a task_config should be forwarded to the client + for use by the task's frontend. + """ + return { + "task_description": "Placeholder Task Description - Javascript failed to load", + "frame_height": 650, + "num_subtasks": self.opts["subtasks_per_unit"], + "question": self.opts["eval_question"], + "block_mobile": True, + "get_task_feedback": False, # TODO(#95) make option + "additional_task_description": self.opts['additional_task_description'], + } + + def get_initialization_data(self) -> Iterable["InitializationData"]: + """ + Return the InitializationData retrieved from the specified stream. + """ + # TODO(#99) once we can release HITs over time, configure this to + # release as many as needed thusfar and top off when + # onboardings fail + print(self.opts) + num_conversations = math.ceil( + self.opts.get("num_matchup_pairs", 8) + / max((self.opts["subtasks_per_unit"] - 1), 1) + ) # release enough hits to finish all annotations requested + return [ + InitializationData(shared={}, unit_data=[{}]) + for d in range(num_conversations) + ] diff --git a/parlai/crowdsourcing/tasks/acute_eval/acute_eval_builder.py b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_builder.py new file mode 100644 index 00000000000..b17e0671dfe --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_builder.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from mephisto.data_model.blueprint import TaskBuilder + +import os +import shutil +import subprocess + +ACUTE_TASK_DIR = os.path.dirname(__file__) +FRONTEND_SOURCE_DIR = os.path.join(ACUTE_TASK_DIR, "webapp") +FRONTEND_BUILD_DIR = os.path.join(FRONTEND_SOURCE_DIR, "build") + + +class AcuteEvalBuilder(TaskBuilder): + """ + Builder for a static task, pulls the appropriate html, builds the frontend (if a + build doesn't already exist), then puts the file into the server directory. + """ + + BUILT_FILE = "done.built" + BUILT_MESSAGE = "built!" + + def rebuild_core(self): + """ + Rebuild the frontend for this task. + """ + return_dir = os.getcwd() + os.chdir(FRONTEND_SOURCE_DIR) + if os.path.exists(FRONTEND_BUILD_DIR): + shutil.rmtree(FRONTEND_BUILD_DIR) + packages_installed = subprocess.call(["npm", "install"]) + if packages_installed != 0: + raise Exception( + "please make sure npm is installed, otherwise view " + "the above error for more info." + ) + webpack_complete = subprocess.call(["npm", "run", "dev"]) + if webpack_complete != 0: + raise Exception( + "Webpack appears to have failed to build your " + "frontend. See the above error for more information." + ) + os.chdir(return_dir) + + def build_in_dir(self, build_dir: str): + """ + Build the frontend if it doesn't exist, then copy into the server directory. + """ + # Only build this task if it hasn't already been built + if True: # not os.path.exists(FRONTEND_BUILD_DIR): + self.rebuild_core() + + # Copy the built core and the given task file to the target path + bundle_js_file = os.path.join(FRONTEND_BUILD_DIR, "bundle.js") + target_resource_dir = os.path.join(build_dir, "static") + target_path = os.path.join(target_resource_dir, "bundle.js") + shutil.copy2(bundle_js_file, target_path) + + copied_static_file = os.path.join( + FRONTEND_SOURCE_DIR, "src", "static", "index.html" + ) + target_path = os.path.join(target_resource_dir, "index.html") + shutil.copy2(copied_static_file, target_path) + + # Write a built file confirmation + with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file: + built_file.write(self.BUILT_MESSAGE) + + # TODO(#97) update test validation + @staticmethod + def task_dir_is_valid(task_dir: str) -> bool: + """ + Acute eval is always valid, we don't have any special resources. + """ + return True diff --git a/parlai/crowdsourcing/tasks/acute_eval/acute_eval_runner.py b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_runner.py new file mode 100644 index 00000000000..72bfc2bdb65 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/acute_eval_runner.py @@ -0,0 +1,458 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from mephisto.data_model.blueprint import TaskRunner + +import random +import queue +import json + +from typing import List, Any, Dict, Tuple, Set, TYPE_CHECKING + +if TYPE_CHECKING: + from mephisto.data_model.task import TaskRun + from mephisto.data_model.assignment import Unit + from mephisto.data_model.agent import Agent +from mephisto.core.logger_core import get_logger + +logger = get_logger(name=__name__, verbose=True, level="info") + +DEFAULT_TASK_CONFIG = { + "hit_title": "Which Conversational Partner is Better?", + "hit_description": "Evaluate quality of conversations through comparison.", + "hit_keywords": "chat,evaluation,comparison,conversation", +} + + +PairingsDict = Dict[str, Any] +WorkerID = str +UnitID = str + + +# TODO(#99) ask the run to enqueue new tasks when running out and still +# unfinished tasks remain. +class AcuteEvalRunner(TaskRunner): + """ + Managing class for the acute evaluator process. + + Relevant args are parsed in the `setup_args` function above. + """ + + def __init__(self, task_run: "TaskRun", opts: Any): + """ + Initialize the AcuteEvaluator. + + The following object attributes are used in running ACUTE Eval: + + ``onboarding_tasks``: A list of ALL available _onboarding_ comparison tasks + + ``desired_tasks``: A list of ALL available comparison tasks + + ``task_queue``: A queue of REMAINING tasks, from which HITs are constructed. + + ``worker_data``: A mapping from worker ID to data about the worker, including + their tasks completed, conversations seen, and onboarding todo + + ``failed_onboard``: The set of workers who have failed onboarding + + ``unit_agent_map``: Map from unit id to the worker_id and task data for cleanup + """ + super().__init__(task_run, opts) + random.seed(opts["random_seed"]) + self.is_concurrent = False + self.assignment_duration_in_seconds = ( + task_run.get_task_config().assignment_duration_in_seconds + ) + + # class attributes + self.onboarding_tasks: List[Dict] = [] + self.desired_tasks: List[Dict] = [] + self.task_queue: queue.Queue = queue.Queue() + self.worker_data: Dict[WorkerID, Dict[str, List]] = {} + self.failed_onboard: Set = set() + self.unit_agent_map: Dict[UnitID, Tuple[WorkerID, List[PairingsDict]]] = {} + + # read in conversations data + self._load_conversation_data() + + # setup the task queue + self._setup_task_queue() + + def _get_worker_data(self, worker_id: str) -> Dict[str, List]: + """ + Return worker data if present, else a default dict. + """ + onboarding_todo = list(range(len(self.onboarding_tasks))) + random.shuffle(onboarding_todo) + self.worker_data[worker_id] = self.worker_data.get( + worker_id, + { + "tasks_completed": [], + "conversations_seen": [], + "onboarding_todo": onboarding_todo, + }, + ) + return self.worker_data[worker_id] + + def set_block_qual(self, task_id: str): + """ + Set block qualification if necessary. + + :param task_id: + task id used to set block qualification, if necessary. + """ + if self.opts["block_on_onboarding_fail"]: + self.block_qualification = self.opts["block_qualification"] + if self.block_qualification is None: + self.block_qualification = f"{task_id}_failed_onboarding" + self.opts["block_qualification"] = self.block_qualification + logger.warning( + "No block_qualification set in opt, automatically creating " + "new qualification {}".format(self.block_qualification) + ) + found_qualifications = self.task_run.db.find_qualifications( + self.block_qualification + ) + if len(found_qualifications) == 0: + self.task_run.db.make_qualification(self.block_qualification) + + def _load_conversation_data(self): + """ + Load conversation data. + + Loads in the data from the pairs filepath. + """ + preset_pairs = self.opts.get("pairings_task_data") + if preset_pairs is not None: + self.onboarding_tasks = preset_pairs["onboarding"] + self.desired_tasks = preset_pairs["desired"] + return + + pairs_path = self.opts.get("pairings_filepath") + + with open(pairs_path) as pf: + for i, l in enumerate(pf.readlines()): + convo_pair = json.loads(l.strip()) + eval_speakers = [ + s + for d in convo_pair["dialogue_dicts"] + for s in d["speakers"] + if s in convo_pair["speakers_to_eval"] + ] + # make sure order is preserved + assert eval_speakers == convo_pair["speakers_to_eval"] + model_left_idx = random.choice([0, 1]) + task = { + "task_specs": { + "s1_choice": self.opts["s1_choice"], + "s2_choice": self.opts["s2_choice"], + "question": self.opts["eval_question"], + "is_onboarding": convo_pair["is_onboarding"], + "model_left": { + "name": eval_speakers[model_left_idx], + "dialogue": convo_pair["dialogue_dicts"][model_left_idx][ + "dialogue" + ], + }, + "model_right": { + "name": eval_speakers[1 - model_left_idx], + "dialogue": convo_pair["dialogue_dicts"][ + 1 - model_left_idx + ]["dialogue"], + }, + }, + "pairing_dict": convo_pair, + "pair_id": i, + } + if convo_pair.get("is_onboarding"): + self.onboarding_tasks.append(task) + else: + self.desired_tasks.append(task) + + def _setup_task_queue(self): + """ + Fill task queue with conversation pairs. + """ + for _i in range(self.opts["annotations_per_pair"]): + all_task_keys = list(range(len(self.desired_tasks))) + random.shuffle(all_task_keys) + for p_id in all_task_keys: + self.task_queue.put(self.desired_tasks[p_id]) + + def _get_dialogue_ids(self, task: Dict[str, Any]) -> List[int]: + """ + Return the ids for the dialogues corresponding to a given task. + + :return dialogue_ids: + A list of two ids which correspond to the id for each conversation + """ + return task["pairing_dict"]["dialogue_ids"] + + def _poll_task_queue( + self, worker_id: str, task_data: List[Dict[str, Any]] + ) -> List[PairingsDict]: + """ + Poll task queue for tasks for a worker. + + :param worker_id: + id for worker + + :param task_data: + list of potential tasks already for worker + + :return task_data: + a list of tasks for a worker to complete + """ + worker_data = self._get_worker_data(worker_id) + num_attempts = 0 + while (not self.task_queue.empty()) and num_attempts < self.task_queue.qsize(): + try: + next_task = self.task_queue.get() + except queue.Empty: + break + num_attempts += 1 + + pair_id = next_task["pair_id"] + dialogue_ids = self._get_dialogue_ids(next_task) + + # make sure worker has not seen these conversations before + if pair_id not in worker_data["tasks_completed"] and all( + d_id not in worker_data["conversations_seen"] for d_id in dialogue_ids + ): + # track tasks and conversations seen + worker_data["tasks_completed"].append(pair_id) + worker_data["conversations_seen"].extend(dialogue_ids) + task_data.append(next_task) + if len(task_data) == self.opts["subtasks_per_unit"]: + return task_data + else: + self.task_queue.put(next_task) + + return task_data + + def _top_up_task_data( + self, worker_id: str, task_data: List[Dict[str, Any]] + ) -> List[PairingsDict]: + """ + Top up worker task data. + + This function is called if ``self.task_queue`` is exhausted but + task_data for the worker is less than the `tasks_per_unit`. + + Make sure that all added tasks have not been seen by the worker. + + :param worker_id: + id for worker + + :param task_data: + list of potential tasks already for worker + + :return task_data: + a list of tasks for a worker to complete + """ + worker_data = self._get_worker_data(worker_id) + tasks_still_needed = self.opts["subtasks_per_unit"] - len(task_data) + tasks_remaining = [ + t_id + for t_id in range(len(self.desired_tasks)) + if t_id not in worker_data["tasks_completed"] + ] + # get any pairings with conversations this worker has not seen to fill this hit + additional_tasks = [ + t + for t in tasks_remaining + if all( + d_id not in worker_data["conversations_seen"] + for d_id in self._get_dialogue_ids(self.desired_tasks[t]) + ) + ] + if tasks_still_needed < len(additional_tasks): + additional_tasks = random.sample(additional_tasks, tasks_still_needed) + worker_data["tasks_completed"].extend(additional_tasks) + + for t in additional_tasks: + worker_data["conversations_seen"].extend( + self._get_dialogue_ids(self.desired_tasks[t]) + ) + task_data.extend(self.desired_tasks[t]) + + return task_data + + def get_new_task_data(self, worker_id: str) -> List[PairingsDict]: + """ + Get next task for worker. + + Returns the next onboarding task if worker hasn't finished them all, + Otherwise finds a task from the queue they haven't seen + + If they've seen everything in the queue, spin up an + extra task (one that was in the queue and is now saturated) + + :param worker_id: + worker id + + :return task_data: + A list of tasks for the worker to complete + """ + tasks_per_unit = self.opts["subtasks_per_unit"] + # first add onboarding tasks + task_data = self.get_onboarding_tasks(worker_id) + logger.debug(f"Onboarding task data gotten: {len(task_data)}") + if len(task_data) == tasks_per_unit: + return task_data + + # poll the task queue for more tasks + task_data = self._poll_task_queue(worker_id, task_data) + logger.debug(f"Task queue data gotten: {len(task_data)}") + if len(task_data) == tasks_per_unit: + return task_data + + # top up the task_data if we don't hit the desired tasks_per_unit + task_data = self._top_up_task_data(worker_id, task_data) + logger.debug(f"Topped off data gotten: {len(task_data)}") + return task_data + + def requeue_task_data(self, worker_id: str, task_data: List[PairingsDict]): + """ + Return task to task_queue. + + If the task is an onboarding task, indicate that the worker has + another onboarding task to do. + + :param worker_id: + worker id of worker who is returning task + + :param task_data: + list of unfinished tasks to return to the queue. + """ + worker_data = self._get_worker_data(worker_id) + for subtask_data in task_data: + if subtask_data["task_specs"].get("is_onboarding", False): + worker_data["onboarding_todo"].append(subtask_data["pair_id"]) + else: + self.task_queue.put(subtask_data) + try: + worker_data["tasks_completed"].remove(subtask_data["pair_id"]) + for d_id in self._get_dialogue_ids(subtask_data): + worker_data["conversations_seen"].remove(d_id) + except ValueError: + # Task may have shown up in worker's task queue twice + # due to some unfortunate race condition + logger.exception( + f"could not remove task from worker {worker_id} history", + exc_info=True, + ) + + def get_onboarding_tasks(self, worker_id: str) -> List[PairingsDict]: + """ + Get next onboarding task for given worker. + + :param worker_id: + worker id + + :return: + A list of onboarding tasks for the worker + """ + if len(self.onboarding_tasks) == 0: + return [] + + worker_data = self._get_worker_data(worker_id) + onboarding_todo = worker_data["onboarding_todo"] + if not onboarding_todo: + # worker has completed all required onboarding tasks + return [] + # get onboarding tasks for workers needing them + num_tasks_to_return = min(len(onboarding_todo), self.opts["subtasks_per_unit"]) + onboarding_tasks_chosen = onboarding_todo[:num_tasks_to_return] + worker_data["onboarding_todo"] = onboarding_todo[num_tasks_to_return:] + return [self.onboarding_tasks[t_id] for t_id in onboarding_tasks_chosen] + + def check_and_update_worker_approval(self, agent: "Agent"): + """ + Soft block workers who fail onboarding tasks, keep track of their status. + + :param agent: + Agent that the worker completed the task with. + + :param save_data: + data from the worker's completed tasks + """ + worker = agent.get_worker() + worker_id = worker.db_id + save_data = agent.state.get_data() + all_task_data = save_data["inputs"] + response_data = save_data["outputs"]["final_data"] + num_onboarding_tasks = 0 + num_correct = 0 + + for i in range(len(all_task_data)): + is_onboarding = all_task_data[i]["pairing_dict"].get("is_onboarding", False) + if not is_onboarding: + # not an onboarding task, no need to check correctness + continue + worker_response = response_data[i]["speakerChoice"] + expected_response = all_task_data[i]["pairing_dict"]["correct_answer"] + num_onboarding_tasks += 1 + if worker_response == expected_response: + # count correct answers + num_correct += 1 + if num_onboarding_tasks == 0: + # no onboarding tasks found + if worker_id in self.failed_onboard: + # worker already failed onboarding, add pairings back to queue + self.requeue_task_data(worker_id, all_task_data) + return + if (num_correct / num_onboarding_tasks) >= self.opts["onboarding_threshold"]: + # worker passed onboarding + return + # worker failed onboarding, soft block and record + assert ( + self.block_qualification is not None + ), "Should not be blocking without a block qualification set" + worker.grant_qualification(self.block_qualification, 1) + self.failed_onboard.add(worker_id) + + def get_init_data_for_agent(self, agent: "Agent") -> List[PairingsDict]: + """ + Return the data for an agent already assigned to a particular unit. + """ + init_state = agent.state.get_init_state() + if init_state is not None: + # reconnecting agent, give what we've got + return init_state + else: + worker = agent.get_worker() + task_data = self.get_new_task_data(worker.db_id) + agent.state.set_init_state(task_data) + self.unit_agent_map[agent.get_unit().db_id] = (worker.db_id, task_data) + return task_data + + def run_unit(self, unit: "Unit", agent: "Agent") -> None: + """ + Static runners will get the task data, send it to the user, then wait for the + agent to act (the data to be completed) + """ + # Frontend implicitly asks for the initialization data, so we just need + # to wait for a response + _ = agent.act(timeout=self.assignment_duration_in_seconds) + if self.opts["block_on_onboarding_fail"]: + # check whether workers failed onboarding + self.check_and_update_worker_approval(agent) + logger.info(f"Acute eval done for {agent}") + + def cleanup_unit(self, unit: "Unit") -> None: + """ + An incomplete task needs to have the contents of that task requeued into the + overall task queue. + """ + logger.info(f"Cleaning up unit {unit.db_id}") + if unit.db_id not in self.unit_agent_map: + return logger.warn( + f"Unit {unit.db_id} already appears to have been cleaned up" + ) + worker_id, task_data = self.unit_agent_map[unit.db_id] + del self.unit_agent_map[unit.db_id] + self.requeue_task_data(worker_id, task_data) diff --git a/parlai/crowdsourcing/tasks/acute_eval/example_script.py b/parlai/crowdsourcing/tasks/acute_eval/example_script.py new file mode 100644 index 00000000000..6dff81ca73a --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/example_script.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import time +import shlex +from mephisto.core.operator import Operator +from parlai.crowdsourcing.tasks.acute_eval.acute_eval_blueprint import BLUEPRINT_TYPE +from mephisto.utils.scripts import MephistoRunScriptParser + +""" +Example script for running ACUTE-EVAL. +The only argument that *must* be modified for this to be run is: +``pairings_filepath``: Path to pairings file in the format specified in the README.md + +The following args are useful to tweak to fit your specific needs; + - ``annotations_per_pair``: A useful arg if you'd like to evaluate a given conversation pair + more than once. + - ``num_matchup_pairs``: Essentially, how many pairs of conversations you would like to evaluate + - ``subtasks_per_unit``: How many comparisons you'd like a turker to complete in one HIT + +""" + +TASK_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) + +parser = MephistoRunScriptParser() +parser.add_argument( + "-pfp", + "--pairings-filepath", + default=f"{TASK_DIRECTORY}/pairings.jsonl", + help="Path to pairings file", +) +parser.add_argument( + "-app", + "--annotations-per-pair", + default=1, + help="Annotations per pairing, to ensure worker agreement, default 1", + type=int, +) +parser.add_argument( + "-nmp", + "--num-matchup-pairs", + default=2, + help="Number of pairs per model matchup, default 2", + type=int, +) +parser.add_argument( + "-spu", + "--subtasks-per-unit", + default=5, + help="Number of conversations to evaluate per task, default 5", + type=int, +) + +architect_type, requester_name, db, args = parser.parse_launch_arguments() + +USE_LOCAL = True + +task_title = "Which Conversational Partner is Better?" +task_description = "Evaluate quality of conversations through comparison." +hit_keywords = "chat,evaluation,comparison,conversation" + +ARG_STRING = ( + f"--blueprint-type {BLUEPRINT_TYPE} " + f"--architect-type {architect_type} " + f"--requester-name {requester_name} " + f'--task-title "\\"{task_title}\\"" ' + f'--task-description "\\"{task_description}\\"" ' + "--task-reward 0.5 " + f"--task-tags {hit_keywords} " + f"--maximum-units-per-worker 0 " # Num of units a worker is allowed to do, 0 is infinite + f"--allowed-concurrent 1 " # Workers can only do one task at a time, or onboarding may break +) + +extra_args = { + "pairings_filepath": args['pairings_filepath'], + "block_on_onboarding_fail": True, + "block_qualification": f"acute_eval_{int(time.time())}_block", + # num times to use the same conversation pair + "annotations_per_pair": args["annotations_per_pair"], + "random_seed": 42, # random seed + "subtasks_per_unit": args[ + "subtasks_per_unit" + ], # num comparisons to show within one unit + "num_matchup_pairs": args[ + "num_matchup_pairs" + ], # num pairs of conversations to be compared + # question phrasing + "s1_choice": "I would prefer to talk to ", + "s2_choice": "I would prefer to talk to ", + "eval_question": "Who would you prefer to talk to for a long conversation?", + "assignment_duration_in_seconds": 600, +} + +operator = Operator(db) +operator.parse_and_launch_run_wrapper(shlex.split(ARG_STRING), extra_args=extra_args) +operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30) diff --git a/parlai/crowdsourcing/tasks/acute_eval/pairings.jsonl b/parlai/crowdsourcing/tasks/acute_eval/pairings.jsonl new file mode 100644 index 00000000000..6f504aafdd1 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/pairings.jsonl @@ -0,0 +1,2 @@ +{"is_onboarding": true, "speakers_to_eval": ["modela", "modelc"], "correct_answer": "modela", "tags": ["onboarding1"], "dialogue_dicts": [{"speakers": ["modela", "human_evaluator"], "id": "ABCDEF", "evaluator_id_hashed": "HUMAN1", "oz_id_hashed": null, "dialogue": [{"id": "modela", "text": "Hello how are you?"}, {"id": "human_evaluator", "text": "I'm well, how about yourself?"}, {"id": "modela", "text": "Good, just reading a book."}, {"id": "human_evaluator", "text": "What book are you reading?"}, {"id": "modela", "text": "An English textbook. Do you like to read?"}, {"id": "human_evaluator", "text": "Yes, I really enjoy reading, but my favorite thing to do is dog walking."}, {"id": "modela", "text": "Do you have a dog? I don't have any pets"}, {"id": "human_evaluator", "text": "Yes, I have a labrador poodle mix."}]}, {"speakers": ["modelc", "human_evaluator"], "id": "ZYX", "evaluator_id_hashed": "HUMAN3", "oz_id_hashed": null, "dialogue": [{"id": "modelc", "text": "Hello hello hello"}, {"id": "human_evaluator", "text": "How are you?"}, {"id": "modelc", "text": "Hello hello hello"}, {"id": "human_evaluator", "text": "Hello back"}, {"id": "modelc", "text": "Hello hello hello"}, {"id": "human_evaluator", "text": "You must really like that word"}, {"id": "modelc", "text": "Hello hello hello"}, {"id": "human_evaluator", "text": "Ok"}]}]} +{"is_onboarding": false, "speakers_to_eval": ["modelb", "modela"], "tags": ["example1"], "dialogue_ids": [0, 1], "dialogue_dicts": [{"speakers": ["modelb", "human_evaluator"], "id": "AGHIJK", "evaluator_id_hashed": "HUMAN2", "oz_id_hashed": null, "dialogue": [{"id": "human_evaluator", "text": "Hi, I love food, what about you?"}, {"id": "modelb", "text": "I love food too, what's your favorite? Mine is burgers."}, {"id": "human_evaluator", "text": "I'm a chef and I love all foods. What do you do?"}, {"id": "modelb", "text": "I'm retired now, but I was a nurse."}, {"id": "human_evaluator", "text": "Wow, that's really admirable. My sister is a nurse."}, {"id": "modelb", "text": "Do you have any hobbies?"}, {"id": "human_evaluator", "text": "I like to paint and play piano"}, {"id": "modelb", "text": "You're very artistic. I wish I could be so creative."}]}, {"speakers": ["modela", "human_evaluator"], "id": "123456", "evaluator_id_hashed": "HUMAN1", "oz_id_hashed": null, "dialogue": [{"id": "modela", "text": "Hi how are you doing?"}, {"id": "human_evaluator", "text": "I'm doing ok."}, {"id": "modela", "text": "Oh, what's wrong?"}, {"id": "human_evaluator", "text": "Feeling a bit sick after my workout"}, {"id": "modela", "text": "Do you workout a lot?"}, {"id": "human_evaluator", "text": "Yes, I go to the gym every day. I do a lot of lifting."}, {"id": "modela", "text": "That's cool, I like to climb."}, {"id": "human_evaluator", "text": "I've never been."}]}]} diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/.babelrc b/parlai/crowdsourcing/tasks/acute_eval/webapp/.babelrc new file mode 100644 index 00000000000..5507f2e868d --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/.babelrc @@ -0,0 +1,4 @@ +{ + "presets": ["@babel/env", "@babel/preset-react"], + "plugins": ["@babel/plugin-proposal-class-properties"] +} diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/package.json b/parlai/crowdsourcing/tasks/acute_eval/webapp/package.json new file mode 100644 index 00000000000..2188a598ef0 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/package.json @@ -0,0 +1,34 @@ +{ + "name": "parlai-mturk-task-compiler", + "version": "1.0.0", + "description": "", + "main": "webpack.config.js", + "scripts": { + "dev": "webpack --mode development -q" + }, + "keywords": [], + "author": "", + "dependencies": { + "bootstrap": "^4.3.1", + "jquery": "^3.0.0", + "popper.js": "^1.14.4", + "mephisto-task": "^1.0.10", + "react": "16.13.1", + "react-bootstrap": "^0.32.4", + "react-dom": "16.13.1", + "react-table": "^6.8.6" + }, + "devDependencies": { + "@babel/cli": "^7.1.0", + "@babel/core": "^7.1.0", + "@babel/plugin-proposal-class-properties": "^7.1.0", + "@babel/preset-env": "^7.1.0", + "@babel/preset-react": "^7.0.0", + "babel-loader": "^8.0.2", + "css-loader": "^1.0.0", + "style-loader": "^0.23.0", + "url-loader": "^2.0.1", + "webpack": "^4.19.1", + "webpack-cli": "^3.1.1" + } +} diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/src/app.jsx b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/app.jsx new file mode 100644 index 00000000000..53ad0159aeb --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/app.jsx @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017-present, Facebook, Inc. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +import React from "react"; +import ReactDOM from "react-dom"; +import { + TaskDescription, + BaseFrontend, +} from "./components/core_components.jsx"; +import { useMephistoTask, getBlockedExplanation } from "mephisto-task"; + +/* ================= Application Components ================= */ + +function MainApp() { + const { + blockedReason, + taskConfig, + isPreview, + isLoading, + initialTaskData, + handleSubmit, + } = useMephistoTask(); + + if (blockedReason !== null) { + return

{getBlockedExplanation(blockedReason)}

; + } + if (isPreview) { + return ; + } + if (isLoading) { + return
Initializing...
; + } + if (initialTaskData === null) { + return

Gathering data...

; + } + + return ( +
+ +
+ ); +} + +ReactDOM.render(, document.getElementById("app")); diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/src/components/core_components.jsx b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/components/core_components.jsx new file mode 100644 index 00000000000..59de78ae902 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/components/core_components.jsx @@ -0,0 +1,774 @@ +/* + * Copyright (c) 2017-present, Facebook, Inc. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +import React from "react"; +import { + Button, + Col, + ControlLabel, + Form, + FormControl, + FormGroup, + Grid, + Radio, + Row, +} from "react-bootstrap"; +import $ from "jquery"; + +// blue +const speaker1_color = "#29BFFF"; +// purple +const speaker2_color = "#492FED"; +// grey +const otherspeaker_color = "#eee"; + +const speaker1_style = { + borderRadius: 3, + padding: "1px 4px", + display: "inline-block", + backgroundColor: speaker1_color, + color: "white", +}; +const speaker2_style = { + borderRadius: 3, + padding: "1px 4px", + display: "inline-block", + backgroundColor: speaker2_color, + color: "white", +}; +const otherspeaker_style = { + borderRadius: 3, + padding: "1px 4px", + display: "inline-block", + backgroundColor: otherspeaker_color, +}; + +function ChatMessage({ message, model, is_primary_speaker, image_src }) { + let primary_speaker_color = + model === "model_left" ? speaker1_color : speaker2_color; + let message_container_style = { + display: "block", + width: "100%", + ...{ + float: is_primary_speaker ? "left" : "right", + }, + }; + let message_style = { + borderRadius: 6, + marginBottom: 10, + padding: "5px 10px", + ...(is_primary_speaker + ? { + marginRight: 20, + textAlign: "left", + float: "left", + color: "white", + display: "inline-block", + backgroundColor: primary_speaker_color, + } + : { + textAlign: "right", + float: "right", + display: "inline-block", + marginLeft: 20, + backgroundColor: otherspeaker_color, + }), + }; + if (image_src !== null) { + return ( +
+
+ {message}Image +
+
+ ); + } else { + return ( +
+
{message}
+
+ ); + } +} + +function MessageList({ task_data, index }) { + let messageList; + + if (task_data.pairing_dict === undefined) { + messageList = ( +
+

Loading chats

+
+ ); + } else { + let model = index === 0 ? "model_left" : "model_right"; + let messages = task_data.task_specs[model]["dialogue"]; + let primary_speaker = task_data.task_specs[model]["name"]; + + messageList = messages.map((m, idx) => ( +
+ +
+ )); + } + + return ( +
+ {messageList} +
+ ); +} + +class ChatPane extends React.Component { + constructor(props) { + super(props); + this.state = { chat_height: this.getChatHeight() }; + } + + getChatHeight() { + let entry_pane = $("div#right-bottom-pane").get(0); + let bottom_height = 90; + if (entry_pane !== undefined) { + bottom_height = entry_pane.scrollHeight; + } + return this.props.frame_height - bottom_height; + } + + handleResize() { + if (this.getChatHeight() != this.state.chat_height) { + this.setState({ chat_height: this.getChatHeight() }); + } + } + + render() { + // TODO move to CSS + let top_pane_style = { + width: "100%", + position: "relative", + }; + + let chat_style = { + width: "100%", + height: this.state.chat_height + "px", + paddingTop: "60px", + paddingLeft: "20px", + paddingRight: "20px", + paddingBottom: "20px", + overflowY: "scroll", + }; + + window.setTimeout(() => { + this.handleResize(); + }, 10); + + top_pane_style["height"] = this.state.chat_height + "px"; + + return ( +
+ + + +
+ +
+ + +
+ +
+ +
+
+
+ ); + } +} + +class EvalResponse extends React.Component { + constructor(props) { + super(props); + this.state = { + speakerChoice: "", + textReason: "", + taskData: [], + subtaskIndexSeen: 0, + }; + this.handleInputChange = this.handleInputChange.bind(this); + this.handleEnterKey = this.handleEnterKey.bind(this); + } + + componentDidUpdate(prevProps, prevState, snapshot) { + // Only change in the active status of this component should cause a + // focus event. Not having this would make the focus occur on every + // state update (including things like volume changes) + if (this.props.active && !prevProps.active) { + $("input#id_text_input").focus(); + } + this.props.onInputResize(); + } + + static getDerivedStateFromProps(nextProps, prevState) { + if ( + nextProps.current_subtask_index != null && + nextProps.current_subtask_index !== prevState.subtaskIndexSeen + ) { + return { + subtaskIndexSeen: nextProps.current_subtask_index, + textReason: "", + speakerChoice: "", + }; + } + return {}; + } + + checkValidData() { + let response_data = { + speakerChoice: this.state.speakerChoice, + textReason: this.state.textReason, + }; + if (this.state.speakerChoice !== "" && this.state.textReason.length > 4) { + this.props.onValidDataChange(true, response_data); + return; + } + this.props.onValidDataChange(false, response_data); + } + + handleInputChange(event) { + let target = event.target; + let value = target.value; + let name = target.name; + + this.setState({ [name]: value }, this.checkValidData); + } + + handleEnterKey(event) { + event.preventDefault(); + if (this.props.should_submit) { + this.props.allDoneCallback(); + } else if (this.props.subtask_done && this.props.show_next_task_button) { + this.props.nextButtonCallback(); + } + } + + render() { + console.log("Eval props", this.props); + if ( + this.props.task_data === undefined || + this.props.task_data.task_specs === undefined + ) { + return
; + } + let s1_choice = this.props.task_data.task_specs.s1_choice.split( + "" + ); + let s2_choice = this.props.task_data.task_specs.s2_choice.split( + "" + ); + let s1_name = this.props.task_data.task_specs.model_left.name; + let s2_name = this.props.task_data.task_specs.model_right.name; + let form_question = this.props.task_data.task_specs.question; + let text_question = + "Please provide a brief justification for your choice (a few words or a sentence)"; + let text_reason = ( +
+ {text_question} + + +
+ ); + let speaker1_div =
Speaker 1
; + let speaker2_div =
Speaker 2
; + let choice1 = ( +
+ {s1_choice[0]} + {speaker1_div} + {s1_choice[1]} +
+ ); + let choice2 = ( +
+ {s2_choice[0]} + {speaker2_div} + {s2_choice[1]} +
+ ); + return ( +
+
+
+ {form_question} + + + + {choice1} + + + + + {choice2} + + + + {text_reason} +
+
+
+ ); + } +} + +class TaskFeedbackPane extends React.Component { + constructor(props) { + super(props); + this.state = { + feedbackText: "", + }; + this.handleInputChange = this.handleInputChange.bind(this); + this.handleEnterKey = this.handleEnterKey.bind(this); + } + + getChatHeight() { + let entry_pane = $("div#right-bottom-pane").get(0); + let bottom_height = 90; + if (entry_pane !== undefined) { + bottom_height = entry_pane.scrollHeight; + } + return this.props.frame_height - bottom_height; + } + + handleResize() { + if (this.getChatHeight() != this.state.chat_height) { + this.setState({ chat_height: this.getChatHeight() }); + } + } + + componentDidUpdate(prevProps, prevState, snapshot) { + // Only change in the active status of this component should cause a + // focus event. Not having this would make the focus occur on every + // state update (including things like volume changes) + if (this.props.active && !prevProps.active) { + $("input#id_text_input").focus(); + } + this.props.onInputResize(); + } + + checkValidData() { + let response_data = { + feedbackText: this.state.feedbackText, + }; + this.props.onValidDataChange(true, response_data); + } + + handleInputChange(event) { + let target = event.target; + let value = target.value; + let name = target.name; + + this.setState({ [name]: value }, this.checkValidData); + } + + handleEnterKey(event) { + event.preventDefault(); + this.props.allDoneCallback(); + } + + render() { + if ( + this.props.task_data === undefined || + this.props.task_data.task_specs === undefined + ) { + return
; + } + let text_question = + "If you have any feedback regarding this hit, please leave it here.\nOtherwise, click the [Done with Task] button."; + let text_reason = ( +
+

(Optional)

+

{text_question}

+ +
+ ); + return ( +
+
+
+ {text_reason} + +
+
+
+ ); + } +} + +class ResponsePane extends React.Component { + render() { + return ( +
+ +
+ ); + } +} + +class PairwiseEvalPane extends React.Component { + handleResize() { + if (this.chat_pane !== undefined && this.chat_pane !== null) { + if (this.chat_pane.handleResize !== undefined) { + this.chat_pane.handleResize(); + } + } + } + + render() { + let right_pane = { + maxHeight: "60%", + display: "flex", + flexDirection: "column", + justifyContent: "spaceBetween", + width: "auto", + }; + if ( + this.props.current_subtask_index >= this.props.task_config.num_subtasks + ) { + return ( +
+ { + this.chat_pane = pane; + }} + onInputResize={() => this.handleResize()} + /> +
+ ); + } + return ( +
+ { + this.chat_pane = pane; + }} + /> + this.handleResize()} + /> +
+ ); + } +} + +class TaskDescription extends React.Component { + render() { + let header_text = "Which Conversational Partner is Better?"; + if (this.props.task_config === null) { + return
Loading
; + } + let task_config = this.props.task_config; + let num_subtasks = task_config.num_subtasks; + let question = task_config.question; + let additional_task_description = task_config.additional_task_description; + let content = ( +
+ In this task, you will read two conversations and judge  +
Speaker 1
on the left and  +
Speaker 2
on the right  based on + the quality of conversation only.{" "} + Don't base your judgement  on their hobbies, job, etc.  + Do your best to ignore the{" "} +
other speaker
.  You may need + to scroll down to see the full conversations.  +
+
+ You will judge
Speaker 1
and  +
Speaker 2
on this:  + {question} You should  also provide a very brief + justification. Failure to do so could result  in your hits being + rejected. +
+
+ + {" "} + You will do this for {num_subtasks} pairs of conversations.  Use + the [NEXT] button when you're done with each judgment. + +
+
+ NOTE: please be sure to only accept one of this task at a time.  + Additional pages will show errors or fail to load and you wll not be + able to submit the hit.  +

Please accept the task if you're ready.

+
+ {additional_task_description} +
+ ); + if (!this.props.is_cover_page) { + if (this.props.task_data.task_specs === undefined) { + return
Loading
; + } + let num_subtasks = this.props.num_subtasks; + let cur_index = this.props.current_subtask_index + 1; + let question = this.props.task_data.task_specs.question; + content = ( +
+ + You are currently at comparison {cur_index} / {num_subtasks}{" "} + +
+
+ You will read two conversations and judge  +
Speaker 1
on the left and  +
Speaker 2
on the right  based + on the quality of conversation.{" "} + Don't base your judgement  on their hobbies, job, etc. +   Do your best to ignore the{" "} +
other speaker
.  You may + need to scroll down to see the full conversations.  +
+
+ You will judge
Speaker 1
and  +
Speaker 2
on this:  + {question} You should  also provide a very brief + justification. Failure to do so could result  in your hits being + rejected. +
+
+ + {" "} + You will do this for {num_subtasks} pairs of conversations.  + After completing each judgement, use the [NEXT] button. + +
+
+ {additional_task_description} +
+ ); + } + return ( +
+

{header_text}

+
+ {content} +
+ ); + } +} + +class LeftPane extends React.Component { + render() { + let frame_height = this.props.frame_height; + let frame_style = { + height: frame_height + "px", + backgroundColor: "#dff0d8", + padding: "30px", + overflow: "auto", + }; + let pane_size = this.props.is_cover_page ? "col-xs-12" : "col-xs-4"; + let has_context = this.props.task_data.has_context; + if (this.props.is_cover_page || !has_context) { + return ( +
+ + {this.props.children} +
+ ); + } + } +} + +class MultitaskFrontend extends React.Component { + constructor(props) { + super(props); + + // frame_height is in task_config.frame_height + // get_task_feedback is in task_config.get_task_feedback + // TODO move constants to props rather than state + this.state = { + task_done: false, + subtask_done: false, + task_data: this.props.task_data[0], + all_tasks_data: this.props.task_data, + num_subtasks: this.props.task_config.num_subtasks, + response_data: [], + current_subtask_index: 0, + should_submit: false, + }; + } + + computeShouldSubmit(new_index) { + // Return true if either all tasks are done this round and there is no feedback + // to do, or all tasks are done and we're on the feedback pane + return !( + (new_index < this.state.num_subtasks - 1 && + !this.props.task_config.get_task_feedback) || + (new_index == this.state.num_subtasks - 1 && + this.props.task_config.get_task_feedback) + ); + } + + onValidData(valid, response_data) { + console.log("onValidData", valid, response_data); + let all_response_data = this.state.response_data; + let show_next_task_button = false; + let task_done = true; + all_response_data[this.state.current_subtask_index] = response_data; + if (!this.state.should_submit) { + show_next_task_button = true; + task_done = false; + } + this.setState({ + show_next_task_button: show_next_task_button, + subtask_done: valid, + task_done: task_done, + response_data: all_response_data, + }); + } + + nextButtonCallback() { + let next_subtask_index = this.state.current_subtask_index + 1; + if (next_subtask_index == this.state.num_subtasks) { + this.setState({ + current_subtask_index: next_subtask_index, + task_data: Object.assign({}, this.state.task_data, {}), + subtask_done: true, + task_done: true, + should_submit: this.computeShouldSubmit(next_subtask_index), + }); + } else { + this.setState({ + current_subtask_index: next_subtask_index, + task_data: Object.assign( + {}, + this.state.task_data, + this.state.all_tasks_data[next_subtask_index] + ), + subtask_done: false, + should_submit: this.computeShouldSubmit(next_subtask_index), + }); + } + } + + render() { + let task_config = this.props.task_config; + let frame_height = task_config.frame_height || 650; + let passed_props = { + onValidDataChange: (valid, data) => this.onValidData(valid, data), + nextButtonCallback: () => this.nextButtonCallback(), + allDoneCallback: () => this.props.onSubmit(this.state.response_data), + show_next_task_button: this.state.show_next_task_button, + frame_height: frame_height, + task_config: task_config, + current_subtask_index: this.state.current_subtask_index, + num_subtasks: this.state.num_subtasks, + task_data: this.state.task_data, + task_done: this.state.task_done, + subtask_done: this.state.subtask_done, + should_submit: this.state.should_submit, + }; + return ( +
+
+ + +
+
+ ); + } +} + +export { TaskDescription, MultitaskFrontend as BaseFrontend }; diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/src/css/style.css b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/css/style.css new file mode 100644 index 00000000000..d7e5045a860 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/css/style.css @@ -0,0 +1,9 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +.rt-th:focus { + outline: none !important; +} diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/src/main.js b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/main.js new file mode 100644 index 00000000000..3c2723c6a1d --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/main.js @@ -0,0 +1,7 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +import './app.jsx'; +import './css/style.css'; diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/src/static/index.html b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/static/index.html new file mode 100644 index 00000000000..58a591e350a --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/src/static/index.html @@ -0,0 +1,27 @@ + + + + + + + + Conversation Evaluator + + + + + + + +
+ + + diff --git a/parlai/crowdsourcing/tasks/acute_eval/webapp/webpack.config.js b/parlai/crowdsourcing/tasks/acute_eval/webapp/webpack.config.js new file mode 100644 index 00000000000..ce57708f696 --- /dev/null +++ b/parlai/crowdsourcing/tasks/acute_eval/webapp/webpack.config.js @@ -0,0 +1,47 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +var path = require("path"); +var webpack = require("webpack"); + +module.exports = { + entry: "./src/main.js", + output: { + path: __dirname, + filename: "build/bundle.js", + }, + node: { + net: "empty", + dns: "empty", + }, + resolve: { + alias: { + react: path.resolve("./node_modules/react"), + }, + }, + module: { + rules: [ + { + test: /\.(js|jsx)$/, + loader: "babel-loader", + exclude: /node_modules/, + options: { presets: ["@babel/env"] }, + }, + { + test: /\.css$/, + loader: "style-loader!css-loader", + }, + { + test: /\.(svg|png|jpe?g|ttf)$/, + loader: "url-loader?limit=100000", + }, + { + test: /\.jpg$/, + loader: "file-loader", + }, + ], + }, +}; diff --git a/tests/test_code.py b/tests/test_code.py index 1b6516c3c47..a1db0592edc 100644 --- a/tests/test_code.py +++ b/tests/test_code.py @@ -19,13 +19,14 @@ class TestInit(unittest.TestCase): """ def test_init_everywhere(self): - for folder in testing_utils.git_ls_dirs('parlai'): - if 'mturk' in folder: + for folder_path in testing_utils.git_ls_dirs('parlai'): + excluded_folders = ['mturk', 'webapp'] + if any(folder_name in folder_path for folder_name in excluded_folders): continue self.assertIn( '__init__.py', - os.listdir(folder), - '{} does not contain __init__.py'.format(folder), + os.listdir(folder_path), + '{} does not contain __init__.py'.format(folder_path), )