diff --git a/mim/commands/download.py b/mim/commands/download.py index a22abdb..b1cc566 100644 --- a/mim/commands/download.py +++ b/mim/commands/download.py @@ -210,38 +210,51 @@ def _download_dataset(package: str, dataset: str, dest_root: str) -> None: f'already updated it and still get this error, please report an ' f'issue to {package}') with open(dataset_index_path) as f: - datasets_meta = yaml.load(f, Loader=yaml.SafeLoader) + dataset_metas = yaml.load(f, Loader=yaml.SafeLoader) - if dataset not in datasets_meta: + if dataset not in dataset_metas: raise KeyError(f'Cannot find {dataset} in {dataset_index_path}. ' 'here are the available datasets: ' - '{}'.format('\n'.join(datasets_meta.keys()))) - dataset_meta = datasets_meta[dataset] - + '{}'.format('\n'.join(dataset_metas.keys()))) + dataset_meta = dataset_metas[dataset] # OpenMMLab repo will define the `dataset-index.yml` like this: + # openxlab: true # voc2007: # dataset: PASCAL_VOC2007 # download_root: data # data_root: data # script: tools/dataset_converters/scripts/preprocess_voc2007.sh - # In this case, the top level key "voc2007" means the "Dataset Name" passed + # In this case: + # `openxlab` means download the dataset with `openxlab` cli, otherwise + # use the `odl` cli. Although `odl` cli will not be maintained in the + # future, we still keep it here for compatibility. + + # The top level key "voc2007" means the "Dataset Name" passed # to `mim download --dataset {Dataset Name}` + # The nested field "dataset" means the argument passed to `odl get` # If the value of "dataset" is the same as the "Dataset Name", downstream # repos can skip defining "dataset" and "Dataset Name" will be passed # to `odl get` - src_name = dataset_meta.get('dataset', dataset) + use_openxlab = dataset_metas.get('openxlab', False) + src_name = dataset_meta.get('dataset', dataset) # `odl get` will download raw dataset to `download_root`, and the script # will process the raws data and put the prepared data to the `data_root` download_root = dataset_meta['download_root'] os.makedirs(download_root, exist_ok=True) color_echo(f'Start downloading {dataset} to {download_root}...', 'blue') - subprocess.check_call(['odl', 'get', src_name, '-d', download_root], - stdin=sys.stdin, - stdout=sys.stdout) + if use_openxlab: + subprocess.check_call( + ['openxlab', 'dataset', 'get', src_name, '-d', download_root], + stdin=sys.stdin, + stdout=sys.stdout) + else: + subprocess.check_call(['odl', 'get', src_name, '-d', download_root], + stdin=sys.stdin, + stdout=sys.stdout) if not osp.exists(download_root): return