diff --git a/parlai/core/teachers.py b/parlai/core/teachers.py
index 1b37ee7c5c7..86cdd3e99a3 100644
--- a/parlai/core/teachers.py
+++ b/parlai/core/teachers.py
@@ -1067,6 +1067,7 @@ def __init__(self, opt, data_loader=None, cands=None, shared=None, **kwargs):
             self.reset_data = shared['reset']
             # Share datafile and data_loader for computing num_exs and num_eps
             self.datafile = shared['datafile']
+            self.length_datafile = opt.get('length_datafile', None)
             self.data_loader = shared['data_loader']
             if 'lock' in shared:
                 self.lock = shared['lock']
@@ -1078,6 +1079,7 @@ def __init__(self, opt, data_loader=None, cands=None, shared=None, **kwargs):
                     ERROR_MESSAGE_NO_DATAFILE.format(class_name=self.__class__.__name__)
                 )
             self.datafile = opt['datafile']
+            self.length_datafile = opt.get('length_datafile', None)
             self.reset_data = None
             self.is_reset = True
         self.entry_idx = 0
@@ -1137,8 +1139,13 @@ def load_length(self):
         Note that this can take some time for large datasets. Episode and entry indexes
         cannot be specified during streaming.
         """
-        datafiles = self.datafile if type(self.datafile) is tuple else [self.datafile]
-        length_file = datafiles[0] + ".lengths"
+        if self.length_datafile:
+            length_file = self.length_datafile
+        else:
+            datafiles = (
+                self.datafile if type(self.datafile) is tuple else [self.datafile]
+            )
+            length_file = datafiles[0] + ".lengths"
         if not PathManager.exists(length_file):
             num_eps = 0
             num_exs = 0
diff --git a/parlai/tasks/blended_skill_talk/agents.py b/parlai/tasks/blended_skill_talk/agents.py
index 06341d84f34..ef44b2dccf5 100644
--- a/parlai/tasks/blended_skill_talk/agents.py
+++ b/parlai/tasks/blended_skill_talk/agents.py
@@ -158,8 +158,8 @@ def __init__(self, opt, shared=None):
         )
         super().__init__(opt, shared=shared)
 
-    def get(self, episode_idx, entry_idx=None):
-        gotten = super().get(episode_idx, entry_idx=entry_idx)
+    def _format_example(self, episode_idx, entry_idx=None):
+        gotten = super()._format_example(episode_idx, entry_idx)
         if entry_idx == 0:
             modified_text = self.persona_topicifier.get_modified_text(gotten['text'])
             gotten.force_set('text', modified_text)
diff --git a/parlai/tasks/blended_skill_talk/test/blended_skill_talk_wo_w_persona_topicifier_train.yml b/parlai/tasks/blended_skill_talk/test/blended_skill_talk_wo_w_persona_topicifier_train.yml
index 25543713142..8d3cb101202 100644
--- a/parlai/tasks/blended_skill_talk/test/blended_skill_talk_wo_w_persona_topicifier_train.yml
+++ b/parlai/tasks/blended_skill_talk/test/blended_skill_talk_wo_w_persona_topicifier_train.yml
@@ -34,7 +34,6 @@ acts:
       realization tomorrow...
 
       '
-    label_candidates: []
     labels:
     - I think science fiction is an amazing genre for anything. Future science, technology,
       time travel, FTL travel, they're all such interesting concepts.
@@ -238,7 +237,6 @@ acts:
       most exist within the city built by the Ancients known as Atlantis.
 
       '
-    label_candidates: []
     labels:
     - Awesome! I really love how sci-fi storytellers focus on political/social/philosophical
       issues that would still be around even in the future. Makes them relatable.
@@ -471,7 +469,6 @@ acts:
       and Michael Giacchino, respectively.
 
       '
-    label_candidates: []
     labels:
     - It's not quite sci-fi, but my favorite version of time travel is in Harry Potter
       and the Prisoner of Azkaban. Breaks zero logical rules.
@@ -697,7 +694,6 @@ acts:
       Harry Potter, Ron Weasley, and Hermione Granger.
 
       '
-    label_candidates: []
     labels:
     - If you really want a look at the potential negative consequences of scientific
       innovation, what you should check out is the TV show Fringe. Incredibly well
@@ -836,7 +832,6 @@ acts:
       assumed or defined as requirements.
 
       '
-    label_candidates: []
     labels:
     - No I could not! I couldn't imagine living when internet access was rare and
       very few people had it!
diff --git a/parlai/tasks/wizard_of_wikipedia/agents.py b/parlai/tasks/wizard_of_wikipedia/agents.py
index 62da4b802f8..45dc3fb9bbc 100644
--- a/parlai/tasks/wizard_of_wikipedia/agents.py
+++ b/parlai/tasks/wizard_of_wikipedia/agents.py
@@ -16,31 +16,18 @@
 """
 
 from __future__ import annotations
-from typing import Iterable, Optional, Tuple
+from typing import Iterable, Optional, Tuple, Dict, Any
 from parlai.core.message import Message
 from parlai.core.metrics import AverageMetric, normalize_answer, F1Metric
 from parlai.core.params import ParlaiParser
 from parlai.core.opt import Opt
 import copy
-from parlai.core.teachers import FixedDialogTeacher, MultiTaskTeacher
+from parlai.core.teachers import DialogTeacher, MultiTaskTeacher
 from parlai.utils.io import PathManager
 from parlai.utils import logging
 from parlai.utils.misc import warn_once
 from .build import build
-from .mutators import (
-    AddCheckedSentence,
-    CheckedSentenceAsLabel,
-    AddLabel,
-    AddLabelLM,
-    WowFilterNoPassageUsed,
-)
-import parlai.tasks.wizard_of_internet.mutators
-
-# agents import (
-#    WoiDropoutRetrievedDocs,
-#    WoiChunkRetrievedDocs,
-#    WoiFilterSelectedKnowledgeInRetrievedDocs,
-# )
+import parlai.tasks.wizard_of_internet.mutators  # type: ignore
 
 import json
 import os
@@ -186,7 +173,17 @@ def _build_rare_word_f1(datapath: str) -> RareWordF1Calculator:
     return RareWordF1Calculator(all_text, top_p=0.5)
 
 
-class WizardOfWikipediaTeacher(FixedDialogTeacher):
+def _get_datafile(opt: Opt) -> str:
+    """
+    Extract datafile from opt.
+    """
+    task = opt.get('task', 'wizard_of_wikipedia:WizardOfWikipedia:random_split')
+    split = task.split(':')
+    split = split[2] if len(split) == 3 else 'random_split'
+    return _path(opt, split=split)
+
+
+class WizardOfWikipediaTeacher(DialogTeacher):
     """
     The default teacher; essentially reads the json file and outputs the raw data.
 
@@ -211,40 +208,32 @@ class WizardOfWikipediaTeacher(FixedDialogTeacher):
     """
 
     def __init__(self, opt, shared=None):
-        super().__init__(opt, shared)
-        self.opt = opt
         task = opt.get('task', 'wizard_of_wikipedia:WizardOfWikipedia:random_split')
-        split = task.split(':')
-        split = split[2] if len(split) == 3 else 'random_split'
-        opt['task'] = 'wizard_of_wikipedia'
-        if shared and 'data' in shared:
-            self.data = shared['data']
-        else:
-            self.data_path = _path(opt, split=split)
-            self._setup_data()
-        self.num_exs = sum(len(d['dialog']) for d in self.data)
-        self.reset()
-
-    def _setup_data(self):
-        print('loading: ' + self.data_path)
-        with PathManager.open(self.data_path) as f:
-            self.data = json.load(f)
-
-    def num_episodes(self):
-        return len(self.data)
-
-    def num_examples(self):
-        return self.num_exs
-
-    def get(self, episode_idx, entry_idx=0):
-        d = self.data[episode_idx]
-        dialog_entry = d['dialog'][entry_idx]
-        episode_done = entry_idx == len(d['dialog']) - 1
+        opt['datafile'] = _get_datafile(opt)
+        opt['length_datafile'] = f"{opt['datafile']}_{task.split(':')[:2][-1]}"
+        super().__init__(opt, shared)
+
+    def setup_data(self, datafile):
+        logging.info(f'loading {datafile}')
+        with PathManager.open(datafile) as f:
+            self.raw_data = json.load(f)
+        for episode_idx in range(len(self.raw_data)):
+            for entry_idx in range(self.len_episode(episode_idx)):
+                ex = self._format_example(episode_idx, entry_idx)
+                ex.pop('episode_done', '')
+                if 'label_candidates' in ex and not ex['label_candidates']:
+                    ex.pop('label_candidates')
+                yield ex, entry_idx == 0
+
+    def _format_example(self, episode_idx: int, entry_idx: int) -> Message:
+        episode = self.raw_data[episode_idx]
+        dialog_entry = episode['dialog'][entry_idx]
+        episode_done = entry_idx == len(episode['dialog']) - 1
         action = Message(
             {
-                'wizard_eval': d['wizard_eval'],
-                'chosen_topic': d['chosen_topic'],
-                'chosen_topic_passage': d['chosen_topic_passage'],
+                'wizard_eval': episode['wizard_eval'],
+                'chosen_topic': episode['chosen_topic'],
+                'chosen_topic_passage': episode['chosen_topic_passage'],
                 'text': dialog_entry['text'],
                 'retrieved_topics': dialog_entry['retrieved_topics'],
                 'retrieved_passages': dialog_entry['retrieved_passages'],
@@ -256,10 +245,13 @@ def get(self, episode_idx, entry_idx=0):
 
         return action
 
-    def share(self):
-        shared = super().share()
-        shared['data'] = self.data
-        return shared
+    def len_episode(self, ep: int) -> int:
+        """
+        Length of an episode.
+
+        Optionally overrideable.
+        """
+        return len(self.raw_data[ep]['dialog'])
 
 
 ###############################################################
@@ -291,18 +283,12 @@ class WizardDialogKnowledgeTeacher(WizardOfWikipediaTeacher):
     """
 
     def __init__(self, opt, shared=None):
-        self.add_missing_turns = opt.get('add_missing_turns', 'none')
-        super().__init__(opt, shared)
-        self.label_type = opt.get('label_type', 'response')
-        self.include_knowledge = opt.get('include_knowledge', True)
-        self.include_checked_sentence = opt.get('include_checked_sentence', False)
-        self.knowledge_separator = opt.get('include_knowledge_separator', False)
-        self.chosen_topic_delimiter = opt.get('chosen_topic_delimiter', '\n')
-        self.num_exs = sum(self.len_episode(i) for i in range(len(self.data)))
+        self._init_attributes(opt)
         if shared and 'rare_word_f1' in shared:
             self.rare_word_f1 = shared['rare_word_f1']
         elif self.label_type == 'response':
             self.rare_word_f1 = _build_rare_word_f1(opt['datapath'])
+        super().__init__(opt, shared)
 
     @classmethod
     def add_cmdline_args(
@@ -360,6 +346,17 @@ def add_cmdline_args(
         )
         return parser
 
+    def _init_attributes(self, opt: Opt):
+        """
+        Initialize teacher attributes.
+        """
+        self.add_missing_turns = opt.get('add_missing_turns', 'none')
+        self.label_type = opt.get('label_type', 'response')
+        self.include_knowledge = opt.get('include_knowledge', True)
+        self.include_checked_sentence = opt.get('include_checked_sentence', False)
+        self.knowledge_separator = opt.get('include_knowledge_separator', False)
+        self.chosen_topic_delimiter = opt.get('chosen_topic_delimiter', '\n')
+
     def share(self):
         shared = super().share()
         if hasattr(self, 'rare_word_f1'):
@@ -367,7 +364,7 @@ def share(self):
         return shared
 
     def len_episode(self, ep):
-        d = self.data[ep]
+        d = self.raw_data[ep]
         wizard_first = 'Wizard' in d['dialog'][0]['speaker']
         if wizard_first:
             if self.add_missing_turns == 'none':
@@ -384,11 +381,8 @@ def len_episode(self, ep):
             return len_ep
         return len(d['dialog']) // 2
 
-    def num_examples(self):
-        return self.num_exs
-
-    def get(self, episode_idx, entry_idx=0):
-        d = self.data[episode_idx]
+    def _format_example(self, episode_idx, entry_idx=0):
+        d = self.raw_data[episode_idx]
         episode_done = entry_idx == (self.len_episode(episode_idx) - 1)
 
         wizard_first = 'Wizard' in d['dialog'][0]['speaker']
@@ -511,7 +505,7 @@ def custom_evaluation(
                     model_response['text'], [teacher_action['checked_sentence']]
                 ),
             )
-            if labels:
+            if labels and hasattr(self, 'rare_word_f1'):
                 self.metrics.add(
                     'rare_word_f1',
                     self.rare_word_f1.compute(model_response['text'], labels),
@@ -565,10 +559,9 @@ class BasicdialogTeacher(WizardOfWikipediaTeacher):
 
     def __init__(self, opt, shared=None):
         self.add_missing_turns = opt.get('add_missing_turns', 'none')
-        super().__init__(opt, shared)
         self.speaker_label = opt.get('speaker_label', 'both')
         self.add_topic = opt.get('add_topic', False)
-        self.num_exs = sum(self.len_episode(i) for i in range(len(self.data)))
+        super().__init__(opt, shared)
 
     @classmethod
     def add_cmdline_args(
@@ -600,11 +593,8 @@ def add_cmdline_args(
         )
         return parser
 
-    def num_examples(self):
-        return self.num_exs
-
     def len_episode(self, ep):
-        d = self.data[ep]
+        d = self.raw_data[ep]
         first_speaker = d['dialog'][0]['speaker'].lower()
         if self.speaker_label != 'both' and self.speaker_label in first_speaker:
             if self.add_missing_turns == 'none':
@@ -621,8 +611,8 @@ def len_episode(self, ep):
             return len_ep
         return len(d['dialog']) // 2
 
-    def get(self, episode_idx, entry_idx=0):
-        d = self.data[episode_idx]
+    def _format_example(self, episode_idx, entry_idx=0):
+        d = self.raw_data[episode_idx]
         episode_done = entry_idx == (self.len_episode(episode_idx) - 1)
 
         idx = entry_idx * 2
@@ -696,12 +686,12 @@ class GeneratorTeacher(WizardDialogKnowledgeTeacher):
     def __init__(self, opt, shared=None):
         opt['label_type'] = 'response'
         opt['include_checked_sentence'] = True
-        super().__init__(opt, shared)
         self.knowledge_separator = opt.get('include_knowledge_separator', True)
         self.only_checked_knowledge = opt.get('only_checked_knowledge', False)
         self.prepend_gold_knowledge = opt.get('prepend_gold_knowledge')
         self.gold_knowledge_delimiter = opt.get('gold_knowledge_delimiter', '\n')
         self.dropout = opt.get('ignorant_dropout', 0.0)
+        super().__init__(opt, shared)
 
     @classmethod
     def add_cmdline_args(
@@ -740,8 +730,8 @@ def add_cmdline_args(
     def getID(self):
         return "WizTeacher"
 
-    def get(self, episode_idx, entry_idx=0):
-        a = super().get(episode_idx, entry_idx)
+    def _format_example(self, episode_idx, entry_idx=0):
+        a = super()._format_example(episode_idx, entry_idx)
         # zero out the label candidates?
         if 'knowledge' not in a:
             # just a batch padding item
@@ -798,8 +788,8 @@ class WikiPageTitleTeacher(WizardDialogKnowledgeTeacher):
 
     def __init__(self, opt, shared=None):
         self.opt = copy.deepcopy(opt)
+        self._init_attributes(opt)
         self.opt['label_type'] = 'response'
-        super().__init__(self.opt, shared=shared)
         self.id = 'WikiPageTitleTeacher'
         self._conv_history_len = self.opt['conversation_history_length']
         if not (self._conv_history_len > 0 or self._conv_history_len == -1):
@@ -809,10 +799,7 @@ def __init__(self, opt, shared=None):
             )
             self._conv_history_len = -1
         self._skip_no_title = self.opt['skip_no_title']
-        if not shared:
-            self._preprocess_data()
-        else:
-            self.titles_data = shared['titles_data']
+        super().__init__(self.opt, shared=shared)
 
     @classmethod
     def add_cmdline_args(cls, parser, partial_opt=None):
@@ -857,13 +844,25 @@ def _generate_messages(self, hist, action):
     def _should_include(self, act):
         return not (self._skip_no_title and act['labels'][0] == TOKEN_NOCHOSEN)
 
+    def setup_data(self, datafile):
+        logging.info(f'loading {datafile}')
+        with PathManager.open(datafile) as f:
+            self.raw_data = json.load(f)
+        self._preprocess_data()
+        for episode_idx in range(len(self.titles_data)):
+            for entry_idx in range(self.len_episode(episode_idx)):
+                ex = self._format_example(episode_idx, entry_idx)
+                ex.pop('episode_done', '')
+                if 'label_candidates' in ex and not ex['label_candidates']:
+                    ex.pop('label_candidates')
+                yield ex, entry_idx == 0
+
     def _preprocess_data(self):
         data = []
-        for episode_idx in range(super().num_episodes()):
+        for episode_idx in range(len(self.raw_data)):
             dialog_history = []
-            ex_idx = 0
-            while True:
-                a = super().get(episode_idx, ex_idx)
+            for ex_idx in range(super().len_episode(episode_idx)):
+                a = super()._format_example(episode_idx, ex_idx)
                 text_parts = a['text'].split('\n')
                 if ex_idx == 0:
                     # throwing away chosen_topic
@@ -873,24 +872,15 @@ def _preprocess_data(self):
                     title_act = self._generate_messages(dialog_history, a)
                     if self._should_include(title_act):
                         data.append(title_act)
-                if a['episode_done']:
-                    break
-                ex_idx += 1
                 dialog_history.append(a['labels'][0])
 
-        logging.info(
-            f'{len(data)} title generation examples generated '
-            f'from {super().num_examples()} original examples'
-        )
+        logging.info(f'{len(data)} title generation examples generated ')
         self.titles_data = data
 
-    def num_episodes(self):
-        return len(self.titles_data)
-
-    def num_examples(self):
-        return self.num_episodes()
+    def len_episode(self, ep):
+        return 1
 
-    def get(self, episode_idx, entry_idx=0):
+    def _format_example(self, episode_idx, entry_idx=0):
         return self.titles_data[episode_idx]
 
 
@@ -940,19 +930,7 @@ class DocreaderTeacher(WizardOfWikipediaTeacher):
     """
 
     def __init__(self, opt, shared=None):
-        super().__init__(opt, shared)
 
-        # get number of examples
-        self.num_exs = 0
-        for ep in range(self.num_episodes()):
-            d = self.data[ep]
-            for entry in d['dialog']:
-                if (
-                    entry.get('checked_sentence', None) is not None
-                    and entry.get('checked_sentence') != {}
-                    and TOKEN_NOCHOSEN not in entry.get('checked_sentence')
-                ):
-                    self.num_exs += 1
         self.stop_words = [
             'i',
             'a',
@@ -1057,6 +1035,7 @@ def __init__(self, opt, shared=None):
             self.sent_tok = nltk.data.load(st_path)
 
         self.teacher_type = opt.get('teacher_type')
+        super().__init__(opt, shared)
 
     @classmethod
     def add_cmdline_args(
@@ -1139,9 +1118,6 @@ def get_span(self, one, two):
             )
         return max_span
 
-    def num_examples(self):
-        return self.num_exs
-
     def length_episode(self, dialog):
         len_ep = 0
         idxs = []
@@ -1158,8 +1134,11 @@ def length_episode(self, dialog):
 
         return len_ep, idxs
 
-    def get(self, episode_idx, entry_idx=0):
-        d = self.data[episode_idx]
+    def len_episode(self, ep: int) -> int:
+        return self.length_episode(self.raw_data[ep])[0]
+
+    def _format_example(self, episode_idx, entry_idx=0):
+        d = self.raw_data[episode_idx]
         len_ep, idxs = self.length_episode(d)
         idx = idxs[entry_idx]
 
diff --git a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_test.yml b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_test.yml
index e8e7505d017..ed82e830b2b 100644
--- a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_test.yml
+++ b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_test.yml
@@ -46,7 +46,6 @@ acts:
       until it was replaced by the finer indigo from America.
 
       '
-    label_candidates: []
     text: Blue
     title: Blue
 - - checked_sentence: The Royal Blue was the Baltimore and Ohio Railroad (B&O)'s flagship
@@ -341,7 +340,6 @@ acts:
       purple-orange; or by intermixing complementary colors.
 
       '
-    label_candidates: []
     text: Blue is always nice. I like royal blue.
     title: Royal Blue (train)
 - - checked_sentence: Blue Skies is a 1946 American musical comedy film directed by
@@ -640,7 +638,6 @@ acts:
       Rams, located in Cleveland, Ohio.
 
       '
-    label_candidates: []
     text: Oh that sounds really nice. I bet there was a lot of scenery and blue skies.
     title: Blue Skies (1946 film)
 - - checked_sentence: Cinematography (also called "Direction of Photography") is the
@@ -803,7 +800,6 @@ acts:
       able to respond in time, which is a matter of both attitude and competence.
 
       '
-    label_candidates: []
     text: 'Cinematography
 
       Hi buddy, What you think about cinematography'
@@ -1030,7 +1026,6 @@ acts:
       (0.51–2.95 in), as well as a variety of film feeding systems.
 
       '
-    label_candidates: []
     text: Yes buddy,  Images captured with an electronic image-sensor, produces an
       electrical charge.The word "cinematography" is based on the Greek words  meaning
       movement, motion.
diff --git a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_train.yml b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_train.yml
index 4c006a5dbc4..44bc7e24b17 100644
--- a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_train.yml
+++ b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_train.yml
@@ -37,7 +37,6 @@ acts:
       impossible of realization tomorrow...
 
       '
-    label_candidates: []
     labels:
     - I think science fiction is an amazing genre for anything. Future science, technology,
       time travel, FTL travel, they're all such interesting concepts.
@@ -243,7 +242,6 @@ acts:
       Ancients and most exist within the city built by the Ancients known as Atlantis.
 
       '
-    label_candidates: []
     labels:
     - Awesome! I really love how sci-fi storytellers focus on political/social/philosophical
       issues that would still be around even in the future. Makes them relatable.
@@ -489,7 +487,6 @@ acts:
       composed by Tyler and Michael Giacchino, respectively.
 
       '
-    label_candidates: []
     labels:
     - It's not quite sci-fi, but my favorite version of time travel is in Harry Potter
       and the Prisoner of Azkaban. Breaks zero logical rules.
@@ -721,7 +718,6 @@ acts:
       leading characters: Harry Potter, Ron Weasley, and Hermione Granger.
 
       '
-    label_candidates: []
     labels:
     - If you really want a look at the potential negative consequences of scientific
       innovation, what you should check out is the TV show Fringe. Incredibly well
@@ -868,7 +864,6 @@ acts:
       are often also assumed or defined as requirements.
 
       '
-    label_candidates: []
     labels:
     - No I could not! I couldn't imagine living when internet access was rare and
       very few people had it!
diff --git a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_valid.yml b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_valid.yml
index d5f0a6a76d8..611c0f483c9 100644
--- a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_valid.yml
+++ b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_generator_valid.yml
@@ -152,7 +152,6 @@ acts:
       tailored more for the professional gardener.
 
       '
-    label_candidates: []
     text: 'Gardening
 
       I like Gardening, even when I''ve only been doing it for a short time.'
@@ -472,7 +471,6 @@ acts:
       Karma To Burn), Karma To Burn was signed to Roadrunner Records in 1996.
 
       '
-    label_candidates: []
     text: That sounds great.  I've always thought that I would love living in a farm,
       but I;ve always lived in the city.  What do you mostly plant?
     title: Gardening
@@ -742,7 +740,6 @@ acts:
       to indigenous ingredients and the local palate.
 
       '
-    label_candidates: []
     text: Great, I love the idea of growing my own vegetables and fruits! Do you have
       animals in the farm?
     title: Gardening
@@ -1056,7 +1053,6 @@ acts:
       after being imported into the region at the end of the 17th century (1685-1693).
 
       '
-    label_candidates: []
     text: Wow, it sounds amazing, the Micro-pigs are so cute! are they trainable to
       be well behaved?
     title: Gardening
@@ -1204,7 +1200,6 @@ acts:
       were edited by his father.
 
       '
-    label_candidates: []
     text: 'Bob Ross
 
       I would like to know more about bob ross'
diff --git a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wizard_dialog_knowledge_train.yml b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wizard_dialog_knowledge_train.yml
index b5b8662dfcf..7c85ab61ebf 100644
--- a/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wizard_dialog_knowledge_train.yml
+++ b/parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wizard_dialog_knowledge_train.yml
@@ -34,7 +34,6 @@ acts:
       realization tomorrow...
 
       '
-    label_candidates: []
     labels:
     - I think science fiction is an amazing genre for anything. Future science, technology,
       time travel, FTL travel, they're all such interesting concepts.
@@ -228,7 +227,6 @@ acts:
       most exist within the city built by the Ancients known as Atlantis.
 
       '
-    label_candidates: []
     labels:
     - Awesome! I really love how sci-fi storytellers focus on political/social/philosophical
       issues that would still be around even in the future. Makes them relatable.
@@ -461,7 +459,6 @@ acts:
       and Michael Giacchino, respectively.
 
       '
-    label_candidates: []
     labels:
     - It's not quite sci-fi, but my favorite version of time travel is in Harry Potter
       and the Prisoner of Azkaban. Breaks zero logical rules.
@@ -687,7 +684,6 @@ acts:
       Harry Potter, Ron Weasley, and Hermione Granger.
 
       '
-    label_candidates: []
     labels:
     - If you really want a look at the potential negative consequences of scientific
       innovation, what you should check out is the TV show Fringe. Incredibly well
@@ -826,7 +822,6 @@ acts:
       assumed or defined as requirements.
 
       '
-    label_candidates: []
     labels:
     - No I could not! I couldn't imagine living when internet access was rare and
       very few people had it!
diff --git a/tests/tasks/test_wizard_of_wikipedia.py b/tests/tasks/test_wizard_of_wikipedia.py
index 4db1d02233a..22767e9155b 100644
--- a/tests/tasks/test_wizard_of_wikipedia.py
+++ b/tests/tasks/test_wizard_of_wikipedia.py
@@ -89,30 +89,30 @@ def _run_display_test(self, kwargs):
 
         str_output = stdout.getvalue()
         self.assertTrue(
-            '[ loaded {} episodes with a total of {} examples ]'.format(
+            'loaded {} episodes with a total of {} examples'.format(
                 world.num_episodes(), world.num_examples()
             )
             in str_output,
-            'Wizard of Wikipedia failed with following args: {}'.format(opt),
+            'Wizard of Wikipedia failed with following args: {}'.format(opt)
+            + str_output,
         )
 
     def test_custom_eval(self):
         """
         Test whether custom evaluation works.
         """
-        with testing_utils.capture_output():
-            parser = setup_args()
-            opt = parser.parse_args(
-                [
-                    '--task',
-                    'wizard_of_wikipedia',
-                    '--datatype',
-                    'valid',
-                    '--label-type',
-                    'chosen_sent',
-                ]
-            )
-            teacher = create_task_agent_from_taskname(opt)[0]
+        parser = setup_args()
+        opt = parser.parse_args(
+            [
+                '--task',
+                'wizard_of_wikipedia',
+                '--datatype',
+                'valid',
+                '--label-type',
+                'chosen_sent',
+            ]
+        )
+        teacher = create_task_agent_from_taskname(opt)[0]
 
         title = 'Gardening'
         cands = list('four')