From b83df45a537aef38fb5baa9ab02676c4c3672d9e Mon Sep 17 00:00:00 2001 From: Jing Xu Date: Mon, 14 Jun 2021 22:39:07 +0000 Subject: [PATCH 1/3] log worlds per task --- parlai/scripts/eval_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parlai/scripts/eval_model.py b/parlai/scripts/eval_model.py index 289dc636372..6015759a564 100644 --- a/parlai/scripts/eval_model.py +++ b/parlai/scripts/eval_model.py @@ -159,12 +159,12 @@ def _eval_single_world(opt, agent, task): if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs + base_outfile, extension = os.path.splitext(opt['world_logs']) if is_distributed(): rank = get_rank() - base_outfile, extension = os.path.splitext(opt['world_logs']) - outfile = base_outfile + f'_{rank}' + extension + outfile = base_outfile + f'_{task}_{rank}' + extension else: - outfile = opt['world_logs'] + outfile = base_outfile + f'_{task}' + extension world_logger.write(outfile, world, file_format=opt['save_format']) report = aggregate_unnamed_reports(all_gather_list(world.report())) From 50da6a67152f601e06981051c4db61c41b8ac3d3 Mon Sep 17 00:00:00 2001 From: Jing Xu Date: Wed, 7 Jul 2021 15:42:33 +0000 Subject: [PATCH 2/3] test --- parlai/scripts/eval_model.py | 24 +++++++++++++++++++----- tests/test_eval_model.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/parlai/scripts/eval_model.py b/parlai/scripts/eval_model.py index 6015759a564..f2be995fdd8 100644 --- a/parlai/scripts/eval_model.py +++ b/parlai/scripts/eval_model.py @@ -118,13 +118,27 @@ def _save_eval_stats(opt, report): f.write("\n") # for jq +def get_task_world_logs(task, world_logs, is_multitask=False): + if not is_multitask: + return world_logs + else: + base_outfile, extension = os.path.splitext(world_logs) + return f'{base_outfile}_{task}{extension}' + + def _eval_single_world(opt, agent, task): logging.info(f'Evaluating task {task} using datatype {opt.get("datatype")}.') # set up world logger - world_logger = WorldLogger(opt) if opt['world_logs'] else None - task_opt = opt.copy() # copy opt since we're editing the task task_opt['task'] = task + # add task suffix in case of multi-tasking + if opt['world_logs']: + task_opt['world_logs'] = get_task_world_logs( + task, task_opt['world_logs'], is_multitask=True + ) + + world_logger = WorldLogger(task_opt) if task_opt['world_logs'] else None + world = create_task(task_opt, agent) # create worlds for tasks # set up logging @@ -159,12 +173,12 @@ def _eval_single_world(opt, agent, task): if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs - base_outfile, extension = os.path.splitext(opt['world_logs']) if is_distributed(): rank = get_rank() - outfile = base_outfile + f'_{task}_{rank}' + extension + base_outfile, extension = os.path.splitext(task_opt['world_logs']) + outfile = base_outfile + f'_{rank}' + extension else: - outfile = base_outfile + f'_{task}' + extension + outfile = task_opt['world_logs'] world_logger.write(outfile, world, file_format=opt['save_format']) report = aggregate_unnamed_reports(all_gather_list(world.report())) diff --git a/tests/test_eval_model.py b/tests/test_eval_model.py index 6ac604b8476..5ff0c2e9291 100644 --- a/tests/test_eval_model.py +++ b/tests/test_eval_model.py @@ -8,6 +8,7 @@ import pytest import unittest import parlai.utils.testing as testing_utils +from parlai.scripts.eval_model import get_task_world_logs class TestEvalModel(unittest.TestCase): @@ -227,6 +228,34 @@ def test_save_report(self): json_lines = f.readlines() assert len(json_lines) == 100 + def test_save_multiple_reports(self): + """ + Test that we can save report from eval model. + """ + with testing_utils.tempdir() as tmpdir: + log_report = os.path.join(tmpdir, 'world_logs.jsonl') + save_report = os.path.join(tmpdir, 'report') + multitask = 'integration_tests,blended_skill_talk' + opt = dict( + task=multitask, + model='repeat_label', + datatype='valid', + batchsize=97, + num_examples=100, + display_examples=False, + world_logs=log_report, + report_filename=save_report, + ) + valid, test = testing_utils.eval_model(opt) + + for task in multitask.split(','): + task_log_report = get_task_world_logs( + task, log_report, is_multitask=True + ) + with PathManager.open(task_log_report) as f: + json_lines = f.readlines() + assert len(json_lines) == 100 + if __name__ == '__main__': unittest.main() From d8c59112f1bc78f741437595d9de0c990a04fb26 Mon Sep 17 00:00:00 2001 From: Jing Xu Date: Wed, 7 Jul 2021 15:52:15 +0000 Subject: [PATCH 3/3] doc string --- parlai/scripts/eval_model.py | 2 +- tests/test_eval_model.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parlai/scripts/eval_model.py b/parlai/scripts/eval_model.py index f2be995fdd8..f131392d57c 100644 --- a/parlai/scripts/eval_model.py +++ b/parlai/scripts/eval_model.py @@ -134,7 +134,7 @@ def _eval_single_world(opt, agent, task): # add task suffix in case of multi-tasking if opt['world_logs']: task_opt['world_logs'] = get_task_world_logs( - task, task_opt['world_logs'], is_multitask=True + task, task_opt['world_logs'], is_multitask=len(opt['task'].split(',')) > 1 ) world_logger = WorldLogger(task_opt) if task_opt['world_logs'] else None diff --git a/tests/test_eval_model.py b/tests/test_eval_model.py index 5ff0c2e9291..7a0e8328809 100644 --- a/tests/test_eval_model.py +++ b/tests/test_eval_model.py @@ -228,9 +228,9 @@ def test_save_report(self): json_lines = f.readlines() assert len(json_lines) == 100 - def test_save_multiple_reports(self): + def test_save_multiple_logs(self): """ - Test that we can save report from eval model. + Test that we can save multiple world_logs from eval model on multiple tasks. """ with testing_utils.tempdir() as tmpdir: log_report = os.path.join(tmpdir, 'world_logs.jsonl')