From eeab46f0ce7199ee42eb0f4b979d035cc88eca2f Mon Sep 17 00:00:00 2001 From: Moya Chen Date: Tue, 29 Sep 2020 21:20:31 -0700 Subject: [PATCH 1/2] Add taskmaster2 command-line arg for single domain Before this change, `parlai dd -t taskmaster2 --display-verbose` displayed a bunch of sports conversations. After this change, running `parlai dd -t taskmaster2 --display-verbose --domains music` displays music conversations. Tried on a few other domains to validate; also had a print in the `_load_data()` function. Also verified that no argument case that all domains were used + defining `domains` multiple times only used the last one. --- parlai/tasks/taskmaster2/agents.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/parlai/tasks/taskmaster2/agents.py b/parlai/tasks/taskmaster2/agents.py index 04853826091..c6b96ca3ccc 100644 --- a/parlai/tasks/taskmaster2/agents.py +++ b/parlai/tasks/taskmaster2/agents.py @@ -50,6 +50,13 @@ class _Abstract(DialogTeacher): @classmethod def add_cmdline_args(cls, argparser): argparser.add_argument('--include-ontology', type=bool, default=False) + argparser.add_argument( + '--domains', + nargs='+', + default=[], + choices=DOMAINS, + help='Subset of domains to use. Uses last passed in configuration. Will use all by default if this value is empty', + ) return argparser def __init__(self, opt: Opt, shared=None): @@ -78,10 +85,10 @@ def _h(self, x): def _normalize_annotation(self, anno): return anno - def _load_data(self, fold): + def _load_data(self, fold, domains): # load up the ontology ontology = {} - for section in DOMAINS: + for section in domains: parts = [] fn = os.path.join(self.dpath, section + '.onto.json') with PathManager.open(fn, 'r') as f: @@ -97,7 +104,7 @@ def _load_data(self, fold): ontology[section] = ' ; '.join(parts) chunks = [] - for section in DOMAINS: + for section in domains: with PathManager.open(os.path.join(self.dpath, section + '.json')) as f: subset = pd.read_json(f) subset['domain'] = section @@ -198,8 +205,9 @@ def custom_evaluation( self.metrics.add(f'{domain}_delex_bleu', bleu_metric) def setup_data(self, fold): + domains = self.opt.get('domains', DOMAINS) + chunks = self._load_data(fold, domains) domains_cnt = Counter() - chunks = self._load_data(fold) for _, row in chunks.iterrows(): domains_cnt[row['domain']] += 1 first = True From 2e1ab8b58bab388e38e2498b85e903974252791e Mon Sep 17 00:00:00 2001 From: Moya Chen Date: Tue, 29 Sep 2020 21:20:31 -0700 Subject: [PATCH 2/2] Add taskmaster2 command-line arg for single domain Before this change, `parlai dd -t taskmaster2 --display-verbose` displayed a bunch of sports conversations. After this change, running `parlai dd -t taskmaster2 --display-verbose --domains music` displays a music. Also verified that no argument case that all domains were used. --- parlai/tasks/taskmaster2/agents.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parlai/tasks/taskmaster2/agents.py b/parlai/tasks/taskmaster2/agents.py index c6b96ca3ccc..d2bbf8bdae1 100644 --- a/parlai/tasks/taskmaster2/agents.py +++ b/parlai/tasks/taskmaster2/agents.py @@ -53,9 +53,9 @@ def add_cmdline_args(cls, argparser): argparser.add_argument( '--domains', nargs='+', - default=[], + default=DOMAINS, choices=DOMAINS, - help='Subset of domains to use. Uses last passed in configuration. Will use all by default if this value is empty', + help='Uses last passed in configuration.', ) return argparser