From dd276144196bff153311bdc917302a0b5bbb57d5 Mon Sep 17 00:00:00 2001 From: Bruno Lenzi Date: Tue, 22 Oct 2019 13:19:25 +0200 Subject: [PATCH 1/4] feat: Add argument img_folder to OpenFire Following the discussion in #26, add argument (str or Path) to OpenFire. If not given, the default is used. The rest of the folder structure is kept for the moment. test_datasets.py now tests its type and also if the download succeeded approximately: up to 10 samples can be missing. --- pyronear/datasets/openfire.py | 14 ++++++++------ test/test_datasets.py | 14 ++++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pyronear/datasets/openfire.py b/pyronear/datasets/openfire.py index 20956811..086303b1 100644 --- a/pyronear/datasets/openfire.py +++ b/pyronear/datasets/openfire.py @@ -32,6 +32,7 @@ class OpenFire(VisionDataset): threads (int, optional): If download is set to True, use this amount of threads for downloading the dataset. num_samples (int, optional): Number of samples to download (all by default) + img_folder (str or Path, optional): Location of image folder. Default: /OpenFire/images """ url = 'https://gist.githubusercontent.com/frgfm/f53b4f53a1b2dc3bb4f18c006a32ec0d/raw/c0351134e333710c6ce0c631af5198e109ed7a92/openfire_binary.json' @@ -39,9 +40,11 @@ class OpenFire(VisionDataset): test_file = 'test.pt' classes = [False, True] - def __init__(self, root, train=True, download=False, threads=16, num_samples=None, **kwargs): + def __init__(self, root, train=True, download=False, threads=16, num_samples=None, img_folder=None, **kwargs): super(OpenFire, self).__init__(root, **kwargs) self.train = train # training set or test set + self.img_folder = self._root.joinpath(self.__class__.__name__, 'images') \ + if img_folder is None else Path(img_folder) if download: self.download(threads, num_samples) @@ -118,17 +121,16 @@ def download(self, threads=None, num_samples=None): # Download actual images training_set, test_set = [], [] - img_folder = self._root.joinpath(self._raw, 'images') - img_folder.mkdir(parents=True, exist_ok=True) + self.img_folder.mkdir(parents=True, exist_ok=True) unavailable_idxs = 0 # Prepare URL and filenames for multi-processing entries = [(a['url'], a['name']) for idx, a in enumerate(annotations)] # Use multiple threads to speed up download - download_urls(entries, img_folder, threads=threads) + download_urls(entries, self.img_folder, threads=threads) # Verify downloads for idx, annotation in enumerate(annotations): - img_path = self._raw.joinpath('images', entries[idx][1]) - if self._root.joinpath(img_path).is_file(): + img_path = self.img_folder.joinpath(entries[idx][1]) + if img_path.is_file(): # Encode target target = self.class_to_idx[annotation['target']] # Aggregate img path and annotations diff --git a/test/test_datasets.py b/test/test_datasets.py index 7189aacf..6b1fbf71 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -44,12 +44,17 @@ def test_downloadurls(self): def test_openfire(self): num_samples = 200 + img_folder = tempfile.TemporaryDirectory().name # N.B.: different from root with Path(tempfile.TemporaryDirectory().name) as root: + # Test img_folder argument: wrong type + self.assertRaises(TypeError, datasets.OpenFire, root, download=True, img_folder=1) + # Working case - train_set = datasets.OpenFire(root=root, train=True, download=True, num_samples=num_samples) - test_set = datasets.OpenFire(root=root, train=False, download=True, num_samples=num_samples) + # Test img_folder as Path and str + train_set = datasets.OpenFire(root=root, train=True, download=True, num_samples=num_samples, img_folder=Path(img_folder)) + test_set = datasets.OpenFire(root=root, train=False, download=True, num_samples=num_samples, img_folder=img_folder) # Check inherited properties self.assertIsInstance(train_set, VisionDataset) @@ -63,8 +68,9 @@ def test_openfire(self): datasets.utils.download_url(train_set.url, root, filename='extract.json', verbose=False) with open(root.joinpath('extract.json'), 'rb') as f: extract = json.load(f)[:num_samples] - # Uncomment when download issues are resolved - # self.assertEqual(len(train_set) + len(test_set), len(extract)) + # Test if not more than 10 downloads failed. + # Change to assertEqual when download issues are resolved + self.assertAlmostEqual(len(train_set) + len(test_set), len(extract), delta=10) # Check integrity of samples img, target = train_set[0] From bce369a6de0cef6b18b6658e11c0b19c1351df7f Mon Sep 17 00:00:00 2001 From: Bruno Lenzi Date: Tue, 22 Oct 2019 17:01:21 +0200 Subject: [PATCH 2/4] style / test: style changes to OpenFire and adding tests on img_folder Following comments on #32: - Avoiding long lines, also splitting if / else in OpenFire c-tor - Changing style and adding test on default img_folder value in test_datasets.py --- pyronear/datasets/openfire.py | 9 ++++++--- test/test_datasets.py | 14 +++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/pyronear/datasets/openfire.py b/pyronear/datasets/openfire.py index 086303b1..6c1bd23e 100644 --- a/pyronear/datasets/openfire.py +++ b/pyronear/datasets/openfire.py @@ -40,11 +40,14 @@ class OpenFire(VisionDataset): test_file = 'test.pt' classes = [False, True] - def __init__(self, root, train=True, download=False, threads=16, num_samples=None, img_folder=None, **kwargs): + def __init__(self, root, train=True, download=False, threads=16, num_samples=None, + img_folder=None, **kwargs): super(OpenFire, self).__init__(root, **kwargs) self.train = train # training set or test set - self.img_folder = self._root.joinpath(self.__class__.__name__, 'images') \ - if img_folder is None else Path(img_folder) + if img_folder is None: + self.img_folder = self._root.joinpath(self.__class__.__name__, 'images') + else: + self.img_folder = Path(img_folder) if download: self.download(threads, num_samples) diff --git a/test/test_datasets.py b/test/test_datasets.py index 6b1fbf71..e6f4f775 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -44,12 +44,16 @@ def test_downloadurls(self): def test_openfire(self): num_samples = 200 - img_folder = tempfile.TemporaryDirectory().name # N.B.: different from root - with Path(tempfile.TemporaryDirectory().name) as root: - - # Test img_folder argument: wrong type + # Test img_folder argument: wrong type and default (None) + with tempfile.TemporaryDirectory() as root: self.assertRaises(TypeError, datasets.OpenFire, root, download=True, img_folder=1) + ds = datasets.OpenFire(root=root, download=True, num_samples=num_samples, + img_folder=None) + self.assertIsInstance(ds.img_folder, Path) + + with tempfile.TemporaryDirectory() as root, \ + tempfile.TemporaryDirectory() as img_folder: # Working case # Test img_folder as Path and str @@ -66,7 +70,7 @@ def test_openfire(self): # Check against number of samples in extract (limit to num_samples) datasets.utils.download_url(train_set.url, root, filename='extract.json', verbose=False) - with open(root.joinpath('extract.json'), 'rb') as f: + with open(Path(root).joinpath('extract.json'), 'rb') as f: extract = json.load(f)[:num_samples] # Test if not more than 10 downloads failed. # Change to assertEqual when download issues are resolved From e103f020d0e51c0e1b58cd96021faac2a8c6c198 Mon Sep 17 00:00:00 2001 From: Bruno Lenzi Date: Wed, 23 Oct 2019 11:49:19 +0200 Subject: [PATCH 3/4] feat: add img-folder argument to training scripts Adding argument --img-folder to fastai/train.py and torch/train.py --- references/classification/fastai/train.py | 8 ++++++-- references/classification/torch/train.py | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/references/classification/fastai/train.py b/references/classification/fastai/train.py index 907e602e..e5c22f0c 100644 --- a/references/classification/fastai/train.py +++ b/references/classification/fastai/train.py @@ -29,11 +29,13 @@ def main(args): # Aggregate path and labels into list for fastai ImageDataBunch fnames, labels, is_valid = [], [], [] - for sample in OpenFire(root=args.data_path, train=True, download=True).data: + for sample in OpenFire(root=args.data_path, train=True, download=True, + img_folder=args.img_folder).data: fnames.append(sample['path']) labels.append(sample['target']) is_valid.append(False) - for sample in OpenFire(root=args.data_path, train=False, download=True).data: + for sample in OpenFire(root=args.data_path, train=False, download=True, + img_folder=args.img_folder).data: fnames.append(sample['path']) labels.append(sample['target']) is_valid.append(True) @@ -61,6 +63,8 @@ def main(args): import argparse parser = argparse.ArgumentParser(description='PyroNear Classification Training with Fastai') parser.add_argument('--data-path', default='./data', help='dataset') + parser.add_argument('--img-folder', default=None, + help='Folder containing images. Default: /OpenFire/images') parser.add_argument('--model', default='resnet18', type=str, help='model') parser.add_argument('--device', default='cuda', help='device') parser.add_argument('-b', '--batch-size', default=32, type=int) diff --git a/references/classification/torch/train.py b/references/classification/torch/train.py index 8da6c995..d1ec898e 100644 --- a/references/classification/torch/train.py +++ b/references/classification/torch/train.py @@ -163,9 +163,9 @@ def main(args): # Train & test sets train_set = OpenFire(root=args.data_path, train=True, download=True, - transform=train_transforms) + transform=train_transforms, img_folder=args.img_folder) val_set = OpenFire(root=args.data_path, train=False, download=True, - transform=test_transforms) + transform=test_transforms, img_folder=args.img_folder) num_classes = len(train_set.classes) # Samplers train_sampler = torch.utils.data.RandomSampler(train_set) @@ -230,6 +230,8 @@ def main(args): import argparse parser = argparse.ArgumentParser(description='PyroNear Classification Training') parser.add_argument('--data-path', default='./data', help='dataset') + parser.add_argument('--img-folder', default=None, + help='Folder containing images. Default: /OpenFire/images') parser.add_argument('--model', default='resnet18', help='model') parser.add_argument('--device', default=None, help='device') parser.add_argument('-b', '--batch-size', default=32, type=int) From 79a5e23e0818314c549646c6bad72b4b53f82c97 Mon Sep 17 00:00:00 2001 From: Bruno Lenzi Date: Fri, 25 Sep 2020 18:33:19 +0200 Subject: [PATCH 4/4] test: allow up to 15 failed downloads when testing datasets In test_datasets.py/DatasetsTester.test_openfire: allow up to 15 failed downloads instead of 10 --- test/test_datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 560ab1e2..31c93f76 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -83,9 +83,9 @@ def test_openfire(self): datasets.utils.download_url(train_set.url, root, filename='extract.json', verbose=False) with open(Path(root).joinpath('extract.json'), 'rb') as f: extract = json.load(f)[:num_samples] - # Test if not more than 10 downloads failed. + # Test if not more than 15 downloads failed. # Change to assertEqual when download issues are resolved - self.assertAlmostEqual(len(train_set) + len(test_set), len(extract), delta=10) + self.assertAlmostEqual(len(train_set) + len(test_set), len(extract), delta=15) # Check integrity of samples img, target = train_set[0]