Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Pearlli model chat analysis refactor #4577

Merged
merged 16 commits into from
Jun 10, 2022
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class ModelChatResultsCompiler(BaseModelChatResultsCompiler):
@classmethod
def setup_args(cls):
parser = super().setup_args()
parser.add_argument(
'--results-folders', type=str, help='Comma-separated list of result folders'
)
parser.add_argument(
'--model-nickname', type=str, default='', help='name of the model'
)
Expand All @@ -43,6 +46,10 @@ def setup_args(cls):
def __init__(self, opt: Dict[str, Any]):

AbstractTurnAnnotationResultsCompiler.__init__(self, opt)
if 'results_folders' in opt:
self.results_folders = opt['results_folders'].split(',')
else:
self.results_folders = None

# Input args
self.model_nickname = opt['model_nickname']
Expand Down
2 changes: 1 addition & 1 deletion parlai/crowdsourcing/tasks/model_chat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ Note that onboarding is not currently supported with human+model image chat: use

## Analysis

Run `analysis/compile_results.py` to compile and save statistics about collected human+model chats. The `ModelChatResultsCompiler` in that script uses dummy annotation buckets by default; set `--problem-buckets` in order to define your own. Set `--results-folders` to the value of `mephisto.blueprint.chat_data_folder` used when running HITs.
Run `analysis/compile_results.py` to compile and save statistics about collected human+model chats. The `ModelChatResultsCompiler` in that script uses dummy annotation buckets by default; set `--problem-buckets` in order to define your own. Set `--task-name` to specify a specific task to compile.
506 changes: 237 additions & 269 deletions parlai/crowdsourcing/tasks/model_chat/analysis/compile_results.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class TurnAnnotationsStaticResultsCompiler(AbstractTurnAnnotationResultsCompiler
@classmethod
def setup_args(cls):
parser = super().setup_args()
parser.add_argument(
'--results-folders', type=str, help='Comma-separated list of result folders'
)
parser.add_argument(
'--onboarding-in-flight-data-file',
type=str,
Expand All @@ -51,6 +54,12 @@ def setup_args(cls):

def __init__(self, opt: Dict[str, Any]):
super().__init__(opt)

if 'results_folders' in opt:
self.results_folders = opt['results_folders'].split(',')
else:
self.results_folders = None

# Validate problem buckets
if self.use_problem_buckets and 'none_all_good' not in self.problem_buckets:
# The code relies on a catchall "none" category if the user selects no other
Expand Down
7 changes: 0 additions & 7 deletions parlai/crowdsourcing/utils/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,6 @@ class AbstractTurnAnnotationResultsCompiler(AbstractResultsCompiler):
@classmethod
def setup_args(cls):
parser = super().setup_args()
parser.add_argument(
'--results-folders', type=str, help='Comma-separated list of result folders'
)
parser.add_argument(
'--problem-buckets',
type=str,
Expand All @@ -232,10 +229,6 @@ def __init__(self, opt: Opt):
super().__init__(opt)

# Handle inputs
if 'results_folders' in opt:
self.results_folders = opt['results_folders'].split(',')
else:
self.results_folders = None
if opt['problem_buckets'].lower() not in ['', 'none']:
self.use_problem_buckets = True
self.problem_buckets = opt['problem_buckets'].split(',')
Expand Down
51 changes: 44 additions & 7 deletions tests/crowdsourcing/tasks/model_chat/test_model_chat_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
"""

import glob
import json
import os
import re
from typing import Any, Dict, List

import pytest
from pytest_regressions.file_regression import FileRegressionFixture
Expand All @@ -23,6 +26,42 @@
)
from parlai.crowdsourcing.utils.tests import check_stdout

class TestModelChatResultsCompiler(ModelChatResultsCompiler):
EricMichaelSmith marked this conversation as resolved.
Show resolved Hide resolved
def get_task_data(self) -> List[Dict[str, Any]]:
fake_jsons = []
read_folders = []
date_strings = []
EricMichaelSmith marked this conversation as resolved.
Show resolved Hide resolved

# Load paths
date_strings = sorted(
[
obj
for obj in os.listdir(self.results_folder)
if os.path.isdir(os.path.join(self.results_folder, obj))
and re.fullmatch(r'\d\d\d\d_\d\d_\d\d', obj)
]
)
folders = [os.path.join(self.results_folder, str_) for str_ in date_strings]
read_folders.extend(folders)

for read_folder in read_folders:
for file_name in sorted(os.listdir(read_folder)):
# Read in file
with open(os.path.join(read_folder, file_name), 'rb') as f:
data = json.load(f)
worker_id = data['workers'][0]
assignment_id = data['assignment_ids'][0]
fake_jsons.append(
pearlli98 marked this conversation as resolved.
Show resolved Hide resolved
{
'data': {'save_data': {'custom_data': data}},
'worker_id': worker_id,
'assignment_id': assignment_id,
'status': 'completed',
}
)

return fake_jsons

class TestCompileResults:
"""
Test the analysis code for the model chat task.
Expand Down Expand Up @@ -62,14 +101,12 @@ def setup_teardown(self):

# Run analysis
with testing_utils.capture_output() as output:
arg_string = f"""\
--results-folders {analysis_samples_folder}
--output-folder {tmpdir} \
{flag_string}
"""
parser_ = ModelChatResultsCompiler.setup_args()
arg_string = f"""--output-folder {tmpdir} {flag_string}"""
parser_ = TestModelChatResultsCompiler.setup_args()
args_ = parser_.parse_args(arg_string.split())
ModelChatResultsCompiler(vars(args_)).compile_and_save_results()
compiler = TestModelChatResultsCompiler(vars(args_))
compiler.results_folder = analysis_samples_folder
pearlli98 marked this conversation as resolved.
Show resolved Hide resolved
compiler.compile_and_save_results()
stdout = output.getvalue()

# Define output structure
Expand Down
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
folder,worker_id,hit_id,model_nickname,conversation_idx,turn_idx,agent_idx,text,final_rating
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,-1,1,,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,0,0,Hi!,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,1,1,Utterance placeholder line 1 0,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,2,0,Utterance placeholder line 0 1,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,3,1,Utterance placeholder line 1 1,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,4,0,Utterance placeholder line 0 2,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,5,1,Utterance placeholder line 1 2,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,6,0,Utterance placeholder line 0 3,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,7,1,Utterance placeholder line 1 3,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,8,0,Utterance placeholder line 0 4,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,9,1,Utterance placeholder line 1 4,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,10,0,Utterance placeholder line 0 5,
2020_12_29,WORKER_1,HIT_ID_1,blender_90M,0,11,1,Utterance placeholder line 1 5,1
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,-1,1,,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,0,0,Hi!,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,1,1,Utterance placeholder line 1 0,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,2,0,Utterance placeholder line 0 1,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,3,1,Utterance placeholder line 1 1,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,4,0,Utterance placeholder line 0 2,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,5,1,Utterance placeholder line 1 2,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,6,0,Utterance placeholder line 0 3,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,7,1,Utterance placeholder line 1 3,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,8,0,Utterance placeholder line 0 4,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,9,1,Utterance placeholder line 1 4,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,10,0,Utterance placeholder line 0 5,
2020_12_29,WORKER_2,HIT_ID_2,blender_90M,1,11,1,Utterance placeholder line 1 5,5
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,-1,1,,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,0,0,Hi!,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,1,1,Utterance placeholder line 1 0,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,2,0,Utterance placeholder line 0 1,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,3,1,Utterance placeholder line 1 1,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,4,0,Utterance placeholder line 0 2,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,5,1,Utterance placeholder line 1 2,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,6,0,Utterance placeholder line 0 3,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,7,1,Utterance placeholder line 1 3,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,8,0,Utterance placeholder line 0 4,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,9,1,Utterance placeholder line 1 4,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,10,0,Utterance placeholder line 0 5,
2020_12_29,WORKER_3,HIT_ID_3,blender_90M,2,11,1,Utterance placeholder line 1 5,1
worker_id,hit_id,model_nickname,conversation_idx,turn_idx,agent_idx,text,final_rating
WORKER_1,HIT_ID_1,blender_90M,0,-1,1,,
WORKER_1,HIT_ID_1,blender_90M,0,0,0,Hi!,
WORKER_1,HIT_ID_1,blender_90M,0,1,1,Utterance placeholder line 1 0,
WORKER_1,HIT_ID_1,blender_90M,0,2,0,Utterance placeholder line 0 1,
WORKER_1,HIT_ID_1,blender_90M,0,3,1,Utterance placeholder line 1 1,
WORKER_1,HIT_ID_1,blender_90M,0,4,0,Utterance placeholder line 0 2,
WORKER_1,HIT_ID_1,blender_90M,0,5,1,Utterance placeholder line 1 2,
WORKER_1,HIT_ID_1,blender_90M,0,6,0,Utterance placeholder line 0 3,
WORKER_1,HIT_ID_1,blender_90M,0,7,1,Utterance placeholder line 1 3,
WORKER_1,HIT_ID_1,blender_90M,0,8,0,Utterance placeholder line 0 4,
WORKER_1,HIT_ID_1,blender_90M,0,9,1,Utterance placeholder line 1 4,
WORKER_1,HIT_ID_1,blender_90M,0,10,0,Utterance placeholder line 0 5,
WORKER_1,HIT_ID_1,blender_90M,0,11,1,Utterance placeholder line 1 5,1
WORKER_2,HIT_ID_2,blender_90M,1,-1,1,,
WORKER_2,HIT_ID_2,blender_90M,1,0,0,Hi!,
WORKER_2,HIT_ID_2,blender_90M,1,1,1,Utterance placeholder line 1 0,
WORKER_2,HIT_ID_2,blender_90M,1,2,0,Utterance placeholder line 0 1,
WORKER_2,HIT_ID_2,blender_90M,1,3,1,Utterance placeholder line 1 1,
WORKER_2,HIT_ID_2,blender_90M,1,4,0,Utterance placeholder line 0 2,
WORKER_2,HIT_ID_2,blender_90M,1,5,1,Utterance placeholder line 1 2,
WORKER_2,HIT_ID_2,blender_90M,1,6,0,Utterance placeholder line 0 3,
WORKER_2,HIT_ID_2,blender_90M,1,7,1,Utterance placeholder line 1 3,
WORKER_2,HIT_ID_2,blender_90M,1,8,0,Utterance placeholder line 0 4,
WORKER_2,HIT_ID_2,blender_90M,1,9,1,Utterance placeholder line 1 4,
WORKER_2,HIT_ID_2,blender_90M,1,10,0,Utterance placeholder line 0 5,
WORKER_2,HIT_ID_2,blender_90M,1,11,1,Utterance placeholder line 1 5,5
WORKER_3,HIT_ID_3,blender_90M,2,-1,1,,
WORKER_3,HIT_ID_3,blender_90M,2,0,0,Hi!,
WORKER_3,HIT_ID_3,blender_90M,2,1,1,Utterance placeholder line 1 0,
WORKER_3,HIT_ID_3,blender_90M,2,2,0,Utterance placeholder line 0 1,
WORKER_3,HIT_ID_3,blender_90M,2,3,1,Utterance placeholder line 1 1,
WORKER_3,HIT_ID_3,blender_90M,2,4,0,Utterance placeholder line 0 2,
WORKER_3,HIT_ID_3,blender_90M,2,5,1,Utterance placeholder line 1 2,
WORKER_3,HIT_ID_3,blender_90M,2,6,0,Utterance placeholder line 0 3,
WORKER_3,HIT_ID_3,blender_90M,2,7,1,Utterance placeholder line 1 3,
WORKER_3,HIT_ID_3,blender_90M,2,8,0,Utterance placeholder line 0 4,
WORKER_3,HIT_ID_3,blender_90M,2,9,1,Utterance placeholder line 1 4,
WORKER_3,HIT_ID_3,blender_90M,2,10,0,Utterance placeholder line 0 5,
WORKER_3,HIT_ID_3,blender_90M,2,11,1,Utterance placeholder line 1 5,1
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Date folders: 2020_12_29
Got 3 complete conversation(s) for model: blender_90M
3 complete conversation(s) collected.
0 bad conversation(s).
3 approved conversation(s).
(0 incomplete conversation(s) collected.)
(0 wrong status conversation(s) collected.)
(0 conversation(s) collected with no saved data.)
---blender_90M---
human_utterance_count: 18
human_word_count: 78 (4.33)
Expand All @@ -22,4 +22,4 @@ WORKER_3
1 WORKER_2 1
2 WORKER_3 1

Worker conversation counts: {'WORKER_1': 1, 'WORKER_2': 1, 'WORKER_3': 1}
Worker conversation counts: {'WORKER_1': 1, 'WORKER_2': 1, 'WORKER_3': 1}
Loading