From 9f80fdef9a154b189cc07648683fb2b5a6c01837 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 4 May 2021 23:37:31 +0200 Subject: [PATCH] download() ThreadPool update (#3027) * download() ThreadPool update * update train image count * cid + 1 (cherry picked from commit 8cab907f60b0debdde5107fae985d8b444db72af) --- data/objects365.yaml | 11 ++++++++--- utils/general.py | 5 ++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/data/objects365.yaml b/data/objects365.yaml index 5d19ab5cabbb..eb99995903cf 100644 --- a/data/objects365.yaml +++ b/data/objects365.yaml @@ -6,7 +6,7 @@ # /yolov5 # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../datasets/objects365/images/train # 1.7 Million images +train: ../datasets/objects365/images/train # 1742289 images val: ../datasets/objects365/images/val # 5570 images # number of classes @@ -72,17 +72,22 @@ download: | # Download url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/" - download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json + download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', curl=True, delete=False, threads=8) + # Move + train = dir / 'images' / 'train' + for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'): + f.rename(train / f.name) # move to /images/train + # Labels coco = COCO(dir / 'zhiyuan_objv2_train.json') names = [x["name"] for x in coco.loadCats(coco.getCatIds())] for cid, cat in enumerate(names): catIds = coco.getCatIds(catNms=[cat]) imgIds = coco.getImgIds(catIds=catIds) - for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid}/{len(names)} {cat}'): + for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'): width, height = im["width"], im["height"] path = Path(im["file_name"]) # image filename try: diff --git a/utils/general.py b/utils/general.py index 58c29cc9726b..eff1b87a6223 100644 --- a/utils/general.py +++ b/utils/general.py @@ -220,7 +220,10 @@ def download_one(url, dir): dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # make directory if threads > 1: - ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded + pool = ThreadPool(threads) + pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded + pool.close() + pool.join() else: for u in tuple(url) if isinstance(url, str) else url: download_one(u, dir)