diff --git a/parlai/scripts/data_stats.py b/parlai/scripts/data_stats.py index 932f1793791..a92833d95db 100644 --- a/parlai/scripts/data_stats.py +++ b/parlai/scripts/data_stats.py @@ -108,12 +108,14 @@ def keep_token(t): while not world.epoch_done() and world.total_exs < max_cnt: world.parley() act = world.get_acts()[opt.get('agent')] + if act.is_padding(): + continue for itype in {'input', 'labels'}: if itype == 'input': if opt.get('new_line_new_utt'): txts = act.get('text').split('\n') else: - txts = [act.get('text')] + txts = [act.get('text', '')] else: txts = act.get('labels', act.get('eval_labels', ['']))