diff --git a/parlai/crowdsourcing/projects/multisession_chat/human_eval/compile_results.py b/parlai/crowdsourcing/projects/multisession_chat/human_eval/compile_results.py index 5e5deba3e0f..ff7386e7def 100644 --- a/parlai/crowdsourcing/projects/multisession_chat/human_eval/compile_results.py +++ b/parlai/crowdsourcing/projects/multisession_chat/human_eval/compile_results.py @@ -29,6 +29,15 @@ class ModelChatResultsCompiler(BaseModelChatResultsCompiler): @classmethod def setup_args(cls): parser = super().setup_args() + parser.add_argument( + '--hit-block-list', + type=str, + default='', + help='Comma-separated list of all hits to block', + ) + parser.add_argument( + '--results-folders', type=str, help='Comma-separated list of result folders' + ) parser.add_argument( '--model-nickname', type=str, default='', help='name of the model' ) @@ -43,6 +52,10 @@ def setup_args(cls): def __init__(self, opt: Dict[str, Any]): AbstractTurnAnnotationResultsCompiler.__init__(self, opt) + if 'results_folders' in opt: + self.results_folders = opt['results_folders'].split(',') + else: + self.results_folders = None # Input args self.model_nickname = opt['model_nickname'] diff --git a/parlai/crowdsourcing/tasks/model_chat/README.md b/parlai/crowdsourcing/tasks/model_chat/README.md index 9ffa37bc0f3..0a2700925bc 100644 --- a/parlai/crowdsourcing/tasks/model_chat/README.md +++ b/parlai/crowdsourcing/tasks/model_chat/README.md @@ -59,4 +59,4 @@ Note that onboarding is not currently supported with human+model image chat: use ## Analysis -Run `analysis/compile_results.py` to compile and save statistics about collected human+model chats. The `ModelChatResultsCompiler` in that script uses dummy annotation buckets by default; set `--problem-buckets` in order to define your own. Set `--results-folders` to the value of `mephisto.blueprint.chat_data_folder` used when running HITs. +Run `analysis/compile_results.py` to compile and save statistics about collected human+model chats. The `ModelChatResultsCompiler` in that script uses dummy annotation buckets by default; set `--problem-buckets` in order to define your own. Set `--task-name` to specify a specific task to compile. diff --git a/parlai/crowdsourcing/tasks/model_chat/analysis/compile_results.py b/parlai/crowdsourcing/tasks/model_chat/analysis/compile_results.py index 1d2f34c78bf..2f4c24678d3 100644 --- a/parlai/crowdsourcing/tasks/model_chat/analysis/compile_results.py +++ b/parlai/crowdsourcing/tasks/model_chat/analysis/compile_results.py @@ -4,9 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import json import os -import re from datetime import datetime from typing import Any, Dict @@ -15,6 +13,10 @@ from parlai.crowdsourcing.utils.acceptability import AcceptabilityChecker from parlai.crowdsourcing.utils.analysis import AbstractTurnAnnotationResultsCompiler +from parlai.crowdsourcing.tasks.model_chat.model_chat_blueprint import BLUEPRINT_TYPE + +# importing BLUEPRINT_TYPE to force registration of the blueprint, not using this var itself +_ = BLUEPRINT_TYPE class ModelChatResultsCompiler(AbstractTurnAnnotationResultsCompiler): @@ -28,12 +30,6 @@ class ModelChatResultsCompiler(AbstractTurnAnnotationResultsCompiler): @classmethod def setup_args(cls): parser = super().setup_args() - parser.add_argument( - '--start-date', - type=str, - default='', - help='The earliest date to analyze results from', - ) parser.add_argument( '--max-convos-per-worker', type=int, @@ -46,12 +42,6 @@ def setup_args(cls): default=4, help='The minimum acceptable mean number of words per human utterance', ) - parser.add_argument( - '--hit-block-list', - type=str, - default='', - help='Comma-separated list of all hits to block', - ) parser.add_argument( '--worker-block-list', type=str, @@ -72,14 +62,9 @@ def __init__(self, opt: Dict[str, Any]): ) # Input args - assert len(self.results_folders) > 0 - for folder in self.results_folders: - assert os.path.isdir(folder), f'{folder} is not a valid folder!' os.makedirs(self.output_folder, exist_ok=True) - self.start_date = opt['start_date'] self.max_convos_per_worker = opt['max_convos_per_worker'] self.min_word_count = opt['min_word_count'] - self.hit_block_list = opt['hit_block_list'].split(',') self.worker_block_list = opt['worker_block_list'].split(',') # Setting up problem buckets @@ -100,33 +85,14 @@ def get_results_path_base(self) -> str: ) def compile_results(self) -> pd.DataFrame: - - read_folders = [] - date_strings = [] - for folder in self.results_folders: - # Load paths - date_strings = sorted( - [ - obj - for obj in os.listdir(folder) - if os.path.isdir(os.path.join(folder, obj)) - and re.fullmatch(r'\d\d\d\d_\d\d_\d\d', obj) - ] - ) - if self.start_date != '': - date_strings = [ - str_ for str_ in date_strings if str_ >= self.start_date - ] - folders = [os.path.join(folder, str_) for str_ in date_strings] - read_folders.extend(folders) - print(f'Date folders: ' + ', '.join(date_strings)) - + task_units_data = self.get_task_data() now = datetime.now() worker_results_file = os.path.join( self.output_folder, f'worker_results_{now.strftime("%Y%m%d_%H%M%S")}.csv' ) # Read in each file - num_incomplete_convos = 0 + num_convos_with_no_save_data = 0 + num_wrong_status_convos = 0 num_complete_convos = 0 complete_convos_per_model = {} bad_conversations = [] @@ -137,262 +103,255 @@ def compile_results(self) -> pd.DataFrame: conversation_idx = 0 conversation_dfs = [] - for read_folder in read_folders: - read_folder_name = os.path.split(read_folder)[-1] - for file_name in sorted(os.listdir(read_folder)): - if file_name in self.hit_block_list: - continue + for task_unit in task_units_data: + + worker_id = task_unit['worker_id'] + assignment_id = task_unit['assignment_id'] + + # Determining whether the task unit should be skipped + # Extract out custom data + if task_unit['data']['save_data'] is None: + num_convos_with_no_save_data += 1 + continue + elif task_unit['status'] not in ['completed', 'approved']: + num_wrong_status_convos += 1 + continue + else: + num_complete_convos += 1 + + # Read in file + data = task_unit['data']['save_data']['custom_data'] + + # Only include the first max_convos_per_worker conversations from a + # worker to avoid biasing + worker_id = task_unit['worker_id'] + assignment_id = task_unit['assignment_id'] + if worker_id in worker_conversation_counts: + conversations_so_far = worker_conversation_counts[worker_id] + else: + conversations_so_far = 0 + worker_conversation_counts[worker_id] = conversations_so_far + 1 + if ( + self.max_convos_per_worker != -1 + and conversations_so_far >= self.max_convos_per_worker + ): + print( + f'Had {conversations_so_far} conversation(s) already from this worker {worker_id}. Skipping {assignment_id}.' + ) + continue - if 'incomplete' in file_name: - num_incomplete_convos += 1 - continue - else: - num_complete_convos += 1 - - # Read in file - with open(os.path.join(read_folder, file_name), 'rb') as f: - data = json.load(f) - - # Only include the first max_convos_per_worker conversations from a - # worker to avoid biasing - worker_id = data['workers'][0] - assignment_id = data['assignment_ids'][0] - if worker_id in worker_conversation_counts: - conversations_so_far = worker_conversation_counts[worker_id] - else: - conversations_so_far = 0 - worker_conversation_counts[worker_id] = conversations_so_far + 1 - if ( - self.max_convos_per_worker != -1 - and conversations_so_far >= self.max_convos_per_worker - ): - print( - f'Had {conversations_so_far} conversation(s) already from this worker {worker_id}. Skipping {assignment_id}.' - ) - continue + # Check if need to block the turker + word_counts = [ + len(d['text'].split(' ')) for d in data['dialog'] if d['agent_idx'] == 0 + ] + utterances = [d['text'] for d in data['dialog'] if d['agent_idx'] == 0] + if np.average(word_counts) < self.min_word_count: + bad_conversations.append(data) + print( + f'Bad complete conversation, words from human: {utterances}. Skipping.' + ) + continue - # Check if need to block the turker - word_counts = [ - len(d['text'].split(' ')) - for d in data['dialog'] - if d['agent_idx'] == 0 + if self.use_problem_buckets: + if not all( + bucket in data['dialog'][1]['problem_data'] + for bucket in self.problem_buckets + ): + raise ValueError('Bucket(s) are missing from the problem data!') + + model_nickname = data['task_description']['model_nickname'] + if model_nickname not in stat_counts: + stat_counts[model_nickname] = {} + if model_nickname in complete_convos_per_model: + complete_convos_per_model[model_nickname] += 1 + else: + complete_convos_per_model[model_nickname] = 1 + + # Extract non-message info + info_dict = { + 'worker': worker_id, + 'model_nickname': model_nickname, + 'bad_workers': ','.join(data['bad_workers']), + 'hit_id': data['hit_ids'][0], + 'assignment_id': assignment_id, + 'context_dataset': data['context_dataset'], + 'additional_context': data['additional_context'], + } + + # Check that the conversation consists of pairs of comments between + # agents 0 and 1, with 0 speaking first + assert all( + [ + utterance_data['agent_idx'] == utterance_idx % 2 + for utterance_idx, utterance_data in enumerate(data['dialog']) ] - utterances = [d['text'] for d in data['dialog'] if d['agent_idx'] == 0] - if np.average(word_counts) < self.min_word_count: - bad_conversations.append(data) - print( - f'Bad complete conversation, words from human: {utterances}. Skipping.' - ) - continue + ) - if self.use_problem_buckets: - if not all( - bucket in data['dialog'][1]['problem_data'] - for bucket in self.problem_buckets - ): - raise ValueError('Bucket(s) are missing from the problem data!') - - model_nickname = data['task_description']['model_nickname'] - if model_nickname not in stat_counts: - stat_counts[model_nickname] = {} - if model_nickname in complete_convos_per_model: - complete_convos_per_model[model_nickname] += 1 - else: - complete_convos_per_model[model_nickname] = 1 + # Determine whether the HIT contains unacceptable messages. + # (We do this for every HIT, even if acceptability violation info + # was already saved, because the violation criteria may have + # changed since the HIT was collected.) + messages_0 = [utt for utt in data['dialog'] if utt['agent_idx'] == 0] + messages_1 = [utt for utt in data['dialog'] if utt['agent_idx'] == 1] + assert len(messages_0) + len(messages_1) == len(data['dialog']) + + # Check the human utterances for safety + utterances_0 = [m['text'] for m in messages_0] + info_dict[ + 'acceptability_violations_0' + ] = self.acceptability_checker.check_messages( + messages=utterances_0, + is_worker_0=True, + violation_types=self.acceptability_checker.ALL_VIOLATION_TYPES, + ) - # Extract non-message info - info_dict = { - 'read_folder_name': read_folder_name, - 'file_name': file_name, - 'worker': worker_id, + # Compile personas and previous utterances + df = pd.DataFrame( + [], + columns=[ + 'worker_id', + 'hit_id', + 'model_nickname', + 'conversation_idx', + 'turn_idx', + 'agent_idx', + 'text', + ] + + self.problem_buckets, + ) + text_parts = [] + if data['personas'] is not None and len(data['personas']) > 0: + text_parts += [ + 'your persona: ' + data['personas'][1][0], + 'your persona: ' + data['personas'][1][1], + ] + if ( + data['additional_context'] is not None + and len(data['additional_context']) > 0 + ): + text_parts.append(data['additional_context']) + df = df.append( + { + 'worker_id': info_dict['worker'], + 'hit_id': info_dict['hit_id'], 'model_nickname': model_nickname, - 'bad_workers': ','.join(data['bad_workers']), - 'hit_id': data['hit_ids'][0], - 'assignment_id': assignment_id, - 'is_incomplete': 'incomplete' in file_name, - 'context_dataset': data['context_dataset'], - 'additional_context': data['additional_context'], - } + 'conversation_idx': conversation_idx, + 'turn_idx': -1, + 'agent_idx': 1, + 'text': '\n'.join(text_parts), + **{bucket: '' for bucket in self.problem_buckets}, + }, + ignore_index=True, + ) - # Check that the conversation consists of pairs of comments between - # agents 0 and 1, with 0 speaking first - assert all( - [ - utterance_data['agent_idx'] == utterance_idx % 2 - for utterance_idx, utterance_data in enumerate(data['dialog']) - ] + total_utterances += len([d for d in data["dialog"] if d["agent_idx"] == 1]) + if len(data['dialog']) > 20: + print( + f'Got long dialogue of {len(data["dialog"])} utterances, hit id: {info_dict["hit_id"]}, model_nickname: {model_nickname}.' ) - # Determine whether the HIT contains unacceptable messages. - # (We do this for every HIT, even if acceptability violation info - # was already saved, because the violation criteria may have - # changed since the HIT was collected.) - messages_0 = [utt for utt in data['dialog'] if utt['agent_idx'] == 0] - messages_1 = [utt for utt in data['dialog'] if utt['agent_idx'] == 1] - assert len(messages_0) + len(messages_1) == len(data['dialog']) - - # Check the human utterances for safety - utterances_0 = [m['text'] for m in messages_0] - info_dict[ - 'acceptability_violations_0' - ] = self.acceptability_checker.check_messages( - messages=utterances_0, - is_worker_0=True, - violation_types=self.acceptability_checker.ALL_VIOLATION_TYPES, - ) + if self.use_problem_buckets: + dialog_has_problems = False + for utterance_idx, utt in enumerate(data['dialog']): - # Compile personas and previous utterances - df = pd.DataFrame( - [], - columns=[ - 'folder', - 'worker_id', - 'hit_id', - 'model_nickname', - 'conversation_idx', - 'turn_idx', - 'agent_idx', - 'text', - ] - + self.problem_buckets, - ) - text_parts = [] - if data['personas'] is not None and len(data['personas']) > 0: - text_parts += [ - 'your persona: ' + data['personas'][1][0], - 'your persona: ' + data['personas'][1][1], - ] - if ( - data['additional_context'] is not None - and len(data['additional_context']) > 0 - ): - text_parts.append(data['additional_context']) - df = df.append( - { - 'folder': info_dict['read_folder_name'], - 'worker_id': info_dict['worker'], - 'hit_id': info_dict['hit_id'], - 'model_nickname': model_nickname, - 'conversation_idx': conversation_idx, - 'turn_idx': -1, - 'agent_idx': 1, - 'text': '\n'.join(text_parts), - **{bucket: '' for bucket in self.problem_buckets}, - }, - ignore_index=True, - ) + d = { + 'worker_id': info_dict['worker'], + 'hit_id': info_dict['hit_id'], + 'model_nickname': model_nickname, + 'conversation_idx': conversation_idx, + 'turn_idx': utterance_idx, + 'agent_idx': utt['agent_idx'], + 'text': utt['text'], + **{bucket: '' for bucket in self.problem_buckets}, + } - total_utterances += len( - [d for d in data["dialog"] if d["agent_idx"] == 1] - ) - if len(data['dialog']) > 20: - print( - f'Got long dialogue of {len(data["dialog"])} utterances, hit id: {info_dict["hit_id"]}, model_nickname: {model_nickname}.' - ) + if utt['agent_idx'] == 1: - if self.use_problem_buckets: - dialog_has_problems = False - for utterance_idx, utt in enumerate(data['dialog']): - - d = { - 'folder': info_dict['read_folder_name'], - 'worker_id': info_dict['worker'], - 'hit_id': info_dict['hit_id'], - 'model_nickname': model_nickname, - 'conversation_idx': conversation_idx, - 'turn_idx': utterance_idx, - 'agent_idx': utt['agent_idx'], - 'text': utt['text'], - **{bucket: '' for bucket in self.problem_buckets}, - } + d['final_rating'] = utt.get('final_rating') - if utt['agent_idx'] == 1: - - d['final_rating'] = utt.get('final_rating') - - if self.use_problem_buckets: - if 'problem_data' not in utt: - for bucket in self.problem_buckets: - d[bucket] = 'MALFORMED' - print( - f'Warning got MALFORMED utterance problem data inside complete convo: {utt}. Skipping.' - ) - continue - else: - for bucket in self.regular_buckets + ['none_all_good']: - d[bucket] = utt['problem_data'][bucket] - for k in self.regular_buckets + ['none_all_good']: - if k not in stat_counts[model_nickname]: - stat_counts[model_nickname][k] = 0 - stat_counts[model_nickname][k] += d[k] - if k != 'none_all_good' and d[k]: - dialog_has_problems = True - - if 'total' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['total'] = 0 - if d['agent_idx'] == 1: - stat_counts[model_nickname]['total'] += 1 - if d['final_rating'] is not None: - # Only one the last utterance (agent idx == 1) - if 'count_ratings' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['count_ratings'] = 0 - stat_counts[model_nickname]['count_ratings'] += 1 - if 'ratings' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['ratings'] = [] - stat_counts[model_nickname]['ratings'].append( - int(d['final_rating']) + if self.use_problem_buckets: + if 'problem_data' not in utt: + for bucket in self.problem_buckets: + d[bucket] = 'MALFORMED' + print( + f'Warning got MALFORMED utterance problem data inside complete convo: {utt}. Skipping.' ) + continue + else: + for bucket in self.regular_buckets + ['none_all_good']: + d[bucket] = utt['problem_data'][bucket] + for k in self.regular_buckets + ['none_all_good']: + if k not in stat_counts[model_nickname]: + stat_counts[model_nickname][k] = 0 + stat_counts[model_nickname][k] += d[k] + if k != 'none_all_good' and d[k]: + dialog_has_problems = True + + if 'total' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['total'] = 0 + if d['agent_idx'] == 1: + stat_counts[model_nickname]['total'] += 1 + if d['final_rating'] is not None: + # Only one the last utterance (agent idx == 1) + if 'count_ratings' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['count_ratings'] = 0 + stat_counts[model_nickname]['count_ratings'] += 1 + if 'ratings' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['ratings'] = [] + stat_counts[model_nickname]['ratings'].append( + int(d['final_rating']) + ) - else: - - # Counting some aspects of the human's utterances - if 'human_utterance_count' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['human_utterance_count'] = 0 - stat_counts[model_nickname]['human_utterance_count'] += 1 + else: - if 'human_word_count' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['human_word_count'] = 0 - stat_counts[model_nickname]['human_word_count'] += len( - d['text'].strip().split(' ') - ) + # Counting some aspects of the human's utterances + if 'human_utterance_count' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['human_utterance_count'] = 0 + stat_counts[model_nickname]['human_utterance_count'] += 1 - if 'human_question_count' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['human_question_count'] = 0 - stat_counts[model_nickname]['human_question_count'] += d[ - 'text' - ].count('?') + if 'human_word_count' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['human_word_count'] = 0 + stat_counts[model_nickname]['human_word_count'] += len( + d['text'].strip().split(' ') + ) - d = self._add_additional_per_turn_stats(d=d, utt=utt) + if 'human_question_count' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['human_question_count'] = 0 + stat_counts[model_nickname]['human_question_count'] += d[ + 'text' + ].count('?') - df = df.append(d, ignore_index=True) + d = self._add_additional_per_turn_stats(d=d, utt=utt) - if info_dict['worker'] not in worker_stats: - worker_stats[info_dict['worker']] = {'conversations': 0} - if self.use_problem_buckets: - worker_stats[info_dict['worker']]['problems_found'] = 0 - worker_stats[info_dict['worker']]['conversations'] += 1 + df = df.append(d, ignore_index=True) + if info_dict['worker'] not in worker_stats: + worker_stats[info_dict['worker']] = {'conversations': 0} if self.use_problem_buckets: - # Count the number of problems the worker got - is_problem = ~df['none_all_good'].replace('', True) - # Only want to count bot utterances but human ones, while included, - # won't be False - count = is_problem.sum() - worker_stats[info_dict['worker']]['problems_found'] += count - - # Logic for calculating percent of conversations that are clean - if 'count_convos' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['count_convos'] = 0 - stat_counts[model_nickname]['count_convos'] += 1 - - if self.use_problem_buckets and not dialog_has_problems: - if 'convo_clean' not in stat_counts[model_nickname]: - stat_counts[model_nickname]['convo_clean'] = 0 - stat_counts[model_nickname]['convo_clean'] += 1 - - # Adding the full conversation to the list of conversations - conversation_dfs.append(df) - conversation_idx += 1 + worker_stats[info_dict['worker']]['problems_found'] = 0 + worker_stats[info_dict['worker']]['conversations'] += 1 + + if self.use_problem_buckets: + # Count the number of problems the worker got + is_problem = ~df['none_all_good'].replace('', True) + # Only want to count bot utterances but human ones, while included, + # won't be False + count = is_problem.sum() + worker_stats[info_dict['worker']]['problems_found'] += count + + # Logic for calculating percent of conversations that are clean + if 'count_convos' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['count_convos'] = 0 + stat_counts[model_nickname]['count_convos'] += 1 + + if self.use_problem_buckets and not dialog_has_problems: + if 'convo_clean' not in stat_counts[model_nickname]: + stat_counts[model_nickname]['convo_clean'] = 0 + stat_counts[model_nickname]['convo_clean'] += 1 + + # Adding the full conversation to the list of conversations + conversation_dfs.append(df) + conversation_idx += 1 for m, conversation_count in complete_convos_per_model.items(): print(f'Got {conversation_count} complete conversation(s) for model: {m}') @@ -401,7 +360,10 @@ def compile_results(self) -> pd.DataFrame: print(f'{len(bad_conversations):d} bad conversation(s).') num_approved_convos = num_complete_convos - len(bad_conversations) print(f'{num_approved_convos:d} approved conversation(s).') - print(f'({num_incomplete_convos:d} incomplete conversation(s) collected.)') + print(f'({num_wrong_status_convos:d} wrong status conversation(s) collected.)') + print( + f'({num_convos_with_no_save_data:d} conversation(s) collected with no saved data.)' + ) for model_nickname, model_stats_dict in stat_counts.items(): print(f'---{model_nickname}---') for p, v in model_stats_dict.items(): diff --git a/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py b/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py index 3225a8cb633..4d833f486c7 100644 --- a/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py +++ b/parlai/crowdsourcing/tasks/turn_annotations_static/analysis/compile_results.py @@ -36,6 +36,9 @@ class TurnAnnotationsStaticResultsCompiler(AbstractTurnAnnotationResultsCompiler @classmethod def setup_args(cls): parser = super().setup_args() + parser.add_argument( + '--results-folders', type=str, help='Comma-separated list of result folders' + ) parser.add_argument( '--onboarding-in-flight-data-file', type=str, @@ -51,6 +54,12 @@ def setup_args(cls): def __init__(self, opt: Dict[str, Any]): super().__init__(opt) + + if 'results_folders' in opt: + self.results_folders = opt['results_folders'].split(',') + else: + self.results_folders = None + # Validate problem buckets if self.use_problem_buckets and 'none_all_good' not in self.problem_buckets: # The code relies on a catchall "none" category if the user selects no other diff --git a/parlai/crowdsourcing/utils/analysis.py b/parlai/crowdsourcing/utils/analysis.py index 2642e4887ff..dacb6a4c327 100644 --- a/parlai/crowdsourcing/utils/analysis.py +++ b/parlai/crowdsourcing/utils/analysis.py @@ -216,9 +216,6 @@ class AbstractTurnAnnotationResultsCompiler(AbstractResultsCompiler): @classmethod def setup_args(cls): parser = super().setup_args() - parser.add_argument( - '--results-folders', type=str, help='Comma-separated list of result folders' - ) parser.add_argument( '--problem-buckets', type=str, @@ -232,10 +229,6 @@ def __init__(self, opt: Opt): super().__init__(opt) # Handle inputs - if 'results_folders' in opt: - self.results_folders = opt['results_folders'].split(',') - else: - self.results_folders = None if opt['problem_buckets'].lower() not in ['', 'none']: self.use_problem_buckets = True self.problem_buckets = opt['problem_buckets'].split(',') diff --git a/requirements.txt b/requirements.txt index b95868c4e43..5b65f5119f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,7 +41,7 @@ sphinx_rtd_theme==0.4.3 sphinx-autodoc-typehints~=1.10.3 Sphinx~=2.2.0 subword-nmt==0.3.7 -tensorboard==2.3.0 +tensorboard==2.9.0 tensorboardX==2.1 tokenizers>=0.8.0 tomli<2.0.0 diff --git a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis.py b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis.py index fa25c9c2ba3..d2d2a1c3a85 100644 --- a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis.py +++ b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis.py @@ -8,7 +8,10 @@ """ import glob +import json import os +import re +from typing import Any, Dict, List import pytest from pytest_regressions.file_regression import FileRegressionFixture @@ -23,6 +26,38 @@ ) from parlai.crowdsourcing.utils.tests import check_stdout + class TestModelChatResultsCompiler(ModelChatResultsCompiler): + def get_task_data(self) -> List[Dict[str, Any]]: + fake_jsons = [] + # Load paths + date_strings = sorted( + [ + obj + for obj in os.listdir(self.results_folder) + if os.path.isdir(os.path.join(self.results_folder, obj)) + and re.fullmatch(r'\d\d\d\d_\d\d_\d\d', obj) + ] + ) + folders = [os.path.join(self.results_folder, str_) for str_ in date_strings] + + for folder in folders: + for file_name in sorted(os.listdir(folder)): + # Read in file + with open(os.path.join(folder, file_name), 'rb') as f: + data = json.load(f) + worker_id = data['workers'][0] + assignment_id = data['assignment_ids'][0] + fake_jsons.append( + { + 'data': {'save_data': {'custom_data': data}}, + 'worker_id': worker_id, + 'assignment_id': assignment_id, + 'status': 'completed', + } + ) + + return fake_jsons + class TestCompileResults: """ Test the analysis code for the model chat task. @@ -62,14 +97,12 @@ def setup_teardown(self): # Run analysis with testing_utils.capture_output() as output: - arg_string = f"""\ ---results-folders {analysis_samples_folder} ---output-folder {tmpdir} \ -{flag_string} -""" - parser_ = ModelChatResultsCompiler.setup_args() + arg_string = f"""--output-folder {tmpdir} {flag_string}""" + parser_ = TestModelChatResultsCompiler.setup_args() args_ = parser_.parse_args(arg_string.split()) - ModelChatResultsCompiler(vars(args_)).compile_and_save_results() + compiler = TestModelChatResultsCompiler(vars(args_)) + compiler.results_folder = analysis_samples_folder + compiler.compile_and_save_results() stdout = output.getvalue() # Define output structure diff --git a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__results.txt b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__results.txt index cd09249931f..5480df28b56 100644 --- a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__results.txt +++ b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__results.txt @@ -1,40 +1,40 @@ -folder,worker_id,hit_id,model_nickname,conversation_idx,turn_idx,agent_idx,text,final_rating -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,-1,1,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,0,0,Hi!, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,1,1,Utterance placeholder line 1 0, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,2,0,Utterance placeholder line 0 1, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,3,1,Utterance placeholder line 1 1, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,4,0,Utterance placeholder line 0 2, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,5,1,Utterance placeholder line 1 2, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,6,0,Utterance placeholder line 0 3, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,7,1,Utterance placeholder line 1 3, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,8,0,Utterance placeholder line 0 4, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,9,1,Utterance placeholder line 1 4, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,10,0,Utterance placeholder line 0 5, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,11,1,Utterance placeholder line 1 5,1 -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,-1,1,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,0,0,Hi!, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,1,1,Utterance placeholder line 1 0, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,2,0,Utterance placeholder line 0 1, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,3,1,Utterance placeholder line 1 1, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,4,0,Utterance placeholder line 0 2, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,5,1,Utterance placeholder line 1 2, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,6,0,Utterance placeholder line 0 3, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,7,1,Utterance placeholder line 1 3, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,8,0,Utterance placeholder line 0 4, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,9,1,Utterance placeholder line 1 4, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,10,0,Utterance placeholder line 0 5, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,11,1,Utterance placeholder line 1 5,5 -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,-1,1,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,0,0,Hi!, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,1,1,Utterance placeholder line 1 0, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,2,0,Utterance placeholder line 0 1, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,3,1,Utterance placeholder line 1 1, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,4,0,Utterance placeholder line 0 2, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,5,1,Utterance placeholder line 1 2, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,6,0,Utterance placeholder line 0 3, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,7,1,Utterance placeholder line 1 3, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,8,0,Utterance placeholder line 0 4, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,9,1,Utterance placeholder line 1 4, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,10,0,Utterance placeholder line 0 5, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,11,1,Utterance placeholder line 1 5,1 +worker_id,hit_id,model_nickname,conversation_idx,turn_idx,agent_idx,text,final_rating +WORKER_1,HIT_ID_1,blender_90M,0,-1,1,, +WORKER_1,HIT_ID_1,blender_90M,0,0,0,Hi!, +WORKER_1,HIT_ID_1,blender_90M,0,1,1,Utterance placeholder line 1 0, +WORKER_1,HIT_ID_1,blender_90M,0,2,0,Utterance placeholder line 0 1, +WORKER_1,HIT_ID_1,blender_90M,0,3,1,Utterance placeholder line 1 1, +WORKER_1,HIT_ID_1,blender_90M,0,4,0,Utterance placeholder line 0 2, +WORKER_1,HIT_ID_1,blender_90M,0,5,1,Utterance placeholder line 1 2, +WORKER_1,HIT_ID_1,blender_90M,0,6,0,Utterance placeholder line 0 3, +WORKER_1,HIT_ID_1,blender_90M,0,7,1,Utterance placeholder line 1 3, +WORKER_1,HIT_ID_1,blender_90M,0,8,0,Utterance placeholder line 0 4, +WORKER_1,HIT_ID_1,blender_90M,0,9,1,Utterance placeholder line 1 4, +WORKER_1,HIT_ID_1,blender_90M,0,10,0,Utterance placeholder line 0 5, +WORKER_1,HIT_ID_1,blender_90M,0,11,1,Utterance placeholder line 1 5,1 +WORKER_2,HIT_ID_2,blender_90M,1,-1,1,, +WORKER_2,HIT_ID_2,blender_90M,1,0,0,Hi!, +WORKER_2,HIT_ID_2,blender_90M,1,1,1,Utterance placeholder line 1 0, +WORKER_2,HIT_ID_2,blender_90M,1,2,0,Utterance placeholder line 0 1, +WORKER_2,HIT_ID_2,blender_90M,1,3,1,Utterance placeholder line 1 1, +WORKER_2,HIT_ID_2,blender_90M,1,4,0,Utterance placeholder line 0 2, +WORKER_2,HIT_ID_2,blender_90M,1,5,1,Utterance placeholder line 1 2, +WORKER_2,HIT_ID_2,blender_90M,1,6,0,Utterance placeholder line 0 3, +WORKER_2,HIT_ID_2,blender_90M,1,7,1,Utterance placeholder line 1 3, +WORKER_2,HIT_ID_2,blender_90M,1,8,0,Utterance placeholder line 0 4, +WORKER_2,HIT_ID_2,blender_90M,1,9,1,Utterance placeholder line 1 4, +WORKER_2,HIT_ID_2,blender_90M,1,10,0,Utterance placeholder line 0 5, +WORKER_2,HIT_ID_2,blender_90M,1,11,1,Utterance placeholder line 1 5,5 +WORKER_3,HIT_ID_3,blender_90M,2,-1,1,, +WORKER_3,HIT_ID_3,blender_90M,2,0,0,Hi!, +WORKER_3,HIT_ID_3,blender_90M,2,1,1,Utterance placeholder line 1 0, +WORKER_3,HIT_ID_3,blender_90M,2,2,0,Utterance placeholder line 0 1, +WORKER_3,HIT_ID_3,blender_90M,2,3,1,Utterance placeholder line 1 1, +WORKER_3,HIT_ID_3,blender_90M,2,4,0,Utterance placeholder line 0 2, +WORKER_3,HIT_ID_3,blender_90M,2,5,1,Utterance placeholder line 1 2, +WORKER_3,HIT_ID_3,blender_90M,2,6,0,Utterance placeholder line 0 3, +WORKER_3,HIT_ID_3,blender_90M,2,7,1,Utterance placeholder line 1 3, +WORKER_3,HIT_ID_3,blender_90M,2,8,0,Utterance placeholder line 0 4, +WORKER_3,HIT_ID_3,blender_90M,2,9,1,Utterance placeholder line 1 4, +WORKER_3,HIT_ID_3,blender_90M,2,10,0,Utterance placeholder line 0 5, +WORKER_3,HIT_ID_3,blender_90M,2,11,1,Utterance placeholder line 1 5,1 \ No newline at end of file diff --git a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__test_stdout.txt b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__test_stdout.txt index d7bb1301745..3d2083a98f6 100644 --- a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__test_stdout.txt +++ b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/basic__test_stdout.txt @@ -1,9 +1,9 @@ -Date folders: 2020_12_29 Got 3 complete conversation(s) for model: blender_90M 3 complete conversation(s) collected. 0 bad conversation(s). 3 approved conversation(s). -(0 incomplete conversation(s) collected.) +(0 wrong status conversation(s) collected.) +(0 conversation(s) collected with no saved data.) ---blender_90M--- human_utterance_count: 18 human_word_count: 78 (4.33) @@ -22,4 +22,4 @@ WORKER_3 1 WORKER_2 1 2 WORKER_3 1 -Worker conversation counts: {'WORKER_1': 1, 'WORKER_2': 1, 'WORKER_3': 1} +Worker conversation counts: {'WORKER_1': 1, 'WORKER_2': 1, 'WORKER_3': 1} \ No newline at end of file diff --git a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__results.txt b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__results.txt index 10eefbd451a..f80a1a52ea6 100644 --- a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__results.txt +++ b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__results.txt @@ -1,51 +1,51 @@ -folder,worker_id,hit_id,model_nickname,conversation_idx,turn_idx,agent_idx,text,bucket_0,bucket_1,bucket_2,bucket_3,bucket_4,none_all_good,final_rating -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,-1,1,"your persona: i also like a glass of wine in the evenings. +worker_id,hit_id,model_nickname,conversation_idx,turn_idx,agent_idx,text,bucket_0,bucket_1,bucket_2,bucket_3,bucket_4,none_all_good,final_rating +WORKER_1,HIT_ID_1,blender_90M,0,-1,1,"your persona: i also like a glass of wine in the evenings. your persona: i drive a mini van.",,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,0,0,Hi!,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,1,1,Utterance placeholder line 1 0,False,False,False,False,True,False, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,2,0,Utterance placeholder line 0 1,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,3,1,Utterance placeholder line 1 1,False,False,False,False,False,True, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,4,0,Utterance placeholder line 0 2,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,5,1,Utterance placeholder line 1 2,False,False,False,False,False,True, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,6,0,Utterance placeholder line 0 3,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,7,1,Utterance placeholder line 1 3,False,False,False,False,False,True, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,8,0,Utterance placeholder line 0 4,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,9,1,Utterance placeholder line 1 4,True,False,False,False,False,False, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,10,0,Utterance placeholder line 0 5,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,11,1,Utterance placeholder line 1 5,False,False,False,False,False,True, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,12,0,Utterance placeholder line 0 6,,,,,,, -2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,13,1,Utterance placeholder line 1 6,False,False,False,False,False,True,1 -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,-1,1,"your persona: i walk dogs for a living. +WORKER_1,HIT_ID_1,blender_90M,0,0,0,Hi!,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,1,1,Utterance placeholder line 1 0,False,False,False,False,True,False, +WORKER_1,HIT_ID_1,blender_90M,0,2,0,Utterance placeholder line 0 1,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,3,1,Utterance placeholder line 1 1,False,False,False,False,False,True, +WORKER_1,HIT_ID_1,blender_90M,0,4,0,Utterance placeholder line 0 2,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,5,1,Utterance placeholder line 1 2,False,False,False,False,False,True, +WORKER_1,HIT_ID_1,blender_90M,0,6,0,Utterance placeholder line 0 3,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,7,1,Utterance placeholder line 1 3,False,False,False,False,False,True, +WORKER_1,HIT_ID_1,blender_90M,0,8,0,Utterance placeholder line 0 4,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,9,1,Utterance placeholder line 1 4,True,False,False,False,False,False, +WORKER_1,HIT_ID_1,blender_90M,0,10,0,Utterance placeholder line 0 5,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,11,1,Utterance placeholder line 1 5,False,False,False,False,False,True, +WORKER_1,HIT_ID_1,blender_90M,0,12,0,Utterance placeholder line 0 6,,,,,,, +WORKER_1,HIT_ID_1,blender_90M,0,13,1,Utterance placeholder line 1 6,False,False,False,False,False,True,1 +WORKER_2,HIT_ID_2,blender_90M,1,-1,1,"your persona: i walk dogs for a living. your persona: i enjoy reading journals and guides related to psychology. I ate at an all you can eat chinese buffet. Literally felt so close to exploding. I ended up going home and falling into a deep slumber.",,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,0,0,Hi!,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,1,1,Utterance placeholder line 1 0,False,True,False,False,False,False, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,2,0,Utterance placeholder line 0 1,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,3,1,Utterance placeholder line 1 1,False,False,False,False,False,True, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,4,0,Utterance placeholder line 0 2,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,5,1,Utterance placeholder line 1 2,False,False,False,False,False,True, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,6,0,Utterance placeholder line 0 3,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,7,1,Utterance placeholder line 1 3,False,False,False,False,False,True, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,8,0,Utterance placeholder line 0 4,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,9,1,Utterance placeholder line 1 4,False,False,False,False,False,True, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,10,0,Utterance placeholder line 0 5,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,11,1,Utterance placeholder line 1 5,False,False,False,False,False,True, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,12,0,Utterance placeholder line 0 6,,,,,,, -2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,13,1,Utterance placeholder line 1 6,False,False,False,False,False,True,5 -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,-1,1,"your persona: i like to make time stop. +WORKER_2,HIT_ID_2,blender_90M,1,0,0,Hi!,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,1,1,Utterance placeholder line 1 0,False,True,False,False,False,False, +WORKER_2,HIT_ID_2,blender_90M,1,2,0,Utterance placeholder line 0 1,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,3,1,Utterance placeholder line 1 1,False,False,False,False,False,True, +WORKER_2,HIT_ID_2,blender_90M,1,4,0,Utterance placeholder line 0 2,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,5,1,Utterance placeholder line 1 2,False,False,False,False,False,True, +WORKER_2,HIT_ID_2,blender_90M,1,6,0,Utterance placeholder line 0 3,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,7,1,Utterance placeholder line 1 3,False,False,False,False,False,True, +WORKER_2,HIT_ID_2,blender_90M,1,8,0,Utterance placeholder line 0 4,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,9,1,Utterance placeholder line 1 4,False,False,False,False,False,True, +WORKER_2,HIT_ID_2,blender_90M,1,10,0,Utterance placeholder line 0 5,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,11,1,Utterance placeholder line 1 5,False,False,False,False,False,True, +WORKER_2,HIT_ID_2,blender_90M,1,12,0,Utterance placeholder line 0 6,,,,,,, +WORKER_2,HIT_ID_2,blender_90M,1,13,1,Utterance placeholder line 1 6,False,False,False,False,False,True,5 +WORKER_3,HIT_ID_3,blender_90M,2,-1,1,"your persona: i like to make time stop. your persona: i live in the cloud. Blue",,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,0,0,Hi!,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,1,1,Utterance placeholder line 1 0,False,False,False,False,False,True, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,2,0,Utterance placeholder line 0 1,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,3,1,Utterance placeholder line 1 1,False,False,False,False,False,True, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,4,0,Utterance placeholder line 0 2,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,5,1,Utterance placeholder line 1 2,False,False,False,False,False,True, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,6,0,Utterance placeholder line 0 3,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,7,1,Utterance placeholder line 1 3,False,False,False,False,False,True, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,8,0,Utterance placeholder line 0 4,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,9,1,Utterance placeholder line 1 4,False,False,False,False,False,True, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,10,0,Utterance placeholder line 0 5,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,11,1,Utterance placeholder line 1 5,False,False,False,False,False,True, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,12,0,Utterance placeholder line 0 6,,,,,,, -2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,13,1,Utterance placeholder line 1 6,False,False,False,False,False,True,1 +WORKER_3,HIT_ID_3,blender_90M,2,0,0,Hi!,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,1,1,Utterance placeholder line 1 0,False,False,False,False,False,True, +WORKER_3,HIT_ID_3,blender_90M,2,2,0,Utterance placeholder line 0 1,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,3,1,Utterance placeholder line 1 1,False,False,False,False,False,True, +WORKER_3,HIT_ID_3,blender_90M,2,4,0,Utterance placeholder line 0 2,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,5,1,Utterance placeholder line 1 2,False,False,False,False,False,True, +WORKER_3,HIT_ID_3,blender_90M,2,6,0,Utterance placeholder line 0 3,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,7,1,Utterance placeholder line 1 3,False,False,False,False,False,True, +WORKER_3,HIT_ID_3,blender_90M,2,8,0,Utterance placeholder line 0 4,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,9,1,Utterance placeholder line 1 4,False,False,False,False,False,True, +WORKER_3,HIT_ID_3,blender_90M,2,10,0,Utterance placeholder line 0 5,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,11,1,Utterance placeholder line 1 5,False,False,False,False,False,True, +WORKER_3,HIT_ID_3,blender_90M,2,12,0,Utterance placeholder line 0 6,,,,,,, +WORKER_3,HIT_ID_3,blender_90M,2,13,1,Utterance placeholder line 1 6,False,False,False,False,False,True,1 \ No newline at end of file diff --git a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__test_stdout.txt b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__test_stdout.txt index ba49723dc28..78c2a99d53c 100644 --- a/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__test_stdout.txt +++ b/tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis/with_personas_and_buckets__test_stdout.txt @@ -1,9 +1,9 @@ -Date folders: 2020_12_29 Got 3 complete conversation(s) for model: blender_90M 3 complete conversation(s) collected. 0 bad conversation(s). 3 approved conversation(s). -(0 incomplete conversation(s) collected.) +(0 wrong status conversation(s) collected.) +(0 conversation(s) collected with no saved data.) ---blender_90M--- human_utterance_count: 21 human_word_count: 93 (4.43) @@ -29,5 +29,4 @@ WORKER_3 1 WORKER_2 1 1 1.0 2 WORKER_3 1 0 0.0 - -Worker conversation counts: {'WORKER_1': 1, 'WORKER_2': 1, 'WORKER_3': 1} +Worker conversation counts: {'WORKER_1': 1, 'WORKER_2': 1, 'WORKER_3': 1} \ No newline at end of file