From 94ee556ea438936cc3686be4922478c5aace5f3d Mon Sep 17 00:00:00 2001 From: Moya Chen Date: Thu, 4 Mar 2021 05:35:38 -0800 Subject: [PATCH] Make CCPE work Cause it's been a while and Google apparently moved the directory and mildly changed the format of the JSON blob. --- parlai/tasks/ccpe/agents.py | 6 +----- parlai/tasks/ccpe/build.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/parlai/tasks/ccpe/agents.py b/parlai/tasks/ccpe/agents.py index 857475c40e6..b7a1b51c68e 100644 --- a/parlai/tasks/ccpe/agents.py +++ b/parlai/tasks/ccpe/agents.py @@ -38,15 +38,11 @@ def _setup_data(self): fpath = os.path.join(self.opt['datapath'], 'CCPE', 'ccpe.json') with PathManager.open(fpath, 'r') as infile: - data = infile.read() - new_data = data.replace('}\n{', '},{') - json_data = json.loads(f'[{new_data}]') + json_data = json.load(infile) flattenedData = [] - for ep in range(len(json_data)): currEp = [] - entry = {} currSegments = [] for i, utterance in enumerate(json_data[ep]['utterances']): diff --git a/parlai/tasks/ccpe/build.py b/parlai/tasks/ccpe/build.py index dcc0e35c4a1..c743037dbfd 100644 --- a/parlai/tasks/ccpe/build.py +++ b/parlai/tasks/ccpe/build.py @@ -10,7 +10,7 @@ RESOURCES = [ DownloadableFile( - 'https://storage.googleapis.com/dialog-data-corpus/CCPE-M-2019/data.json', + 'https://raw.githubusercontent.com/google-research-datasets/ccpe/main/data.json', 'ccpe.json', '4ff051ea7ea60cf0f480c911c7e2cfed56434e2e2c9ea8965ac5e26365773f0a', zipped=False, @@ -20,7 +20,7 @@ def build(opt): dpath = os.path.join(opt['datapath'], 'CCPE') - version = '1.0' + version = '1.1' if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']')