From dd276144196bff153311bdc917302a0b5bbb57d5 Mon Sep 17 00:00:00 2001
From: Bruno Lenzi <Bruno.Lenzi@cern.ch>
Date: Tue, 22 Oct 2019 13:19:25 +0200
Subject: [PATCH 1/4] feat: Add argument img_folder to OpenFire

Following the discussion in #26, add argument  (str or Path) to OpenFire. If not given, the default  is used. The rest of the folder structure is kept for the moment.

test_datasets.py now tests its type and also if the download succeeded approximately: up to 10 samples can be missing.
---
 pyronear/datasets/openfire.py | 14 ++++++++------
 test/test_datasets.py         | 14 ++++++++++----
 2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/pyronear/datasets/openfire.py b/pyronear/datasets/openfire.py
index 20956811..086303b1 100644
--- a/pyronear/datasets/openfire.py
+++ b/pyronear/datasets/openfire.py
@@ -32,6 +32,7 @@ class OpenFire(VisionDataset):
         threads (int, optional): If download is set to True, use this amount of threads
             for downloading the dataset.
         num_samples (int, optional): Number of samples to download (all by default)
+        img_folder (str or Path, optional): Location of image folder. Default: <root>/OpenFire/images
     """
 
     url = 'https://gist.githubusercontent.com/frgfm/f53b4f53a1b2dc3bb4f18c006a32ec0d/raw/c0351134e333710c6ce0c631af5198e109ed7a92/openfire_binary.json'
@@ -39,9 +40,11 @@ class OpenFire(VisionDataset):
     test_file = 'test.pt'
     classes = [False, True]
 
-    def __init__(self, root, train=True, download=False, threads=16, num_samples=None, **kwargs):
+    def __init__(self, root, train=True, download=False, threads=16, num_samples=None, img_folder=None, **kwargs):
         super(OpenFire, self).__init__(root, **kwargs)
         self.train = train  # training set or test set
+        self.img_folder = self._root.joinpath(self.__class__.__name__, 'images') \
+          if img_folder is None else Path(img_folder)
 
         if download:
             self.download(threads, num_samples)
@@ -118,17 +121,16 @@ def download(self, threads=None, num_samples=None):
 
         # Download actual images
         training_set, test_set = [], []
-        img_folder = self._root.joinpath(self._raw, 'images')
-        img_folder.mkdir(parents=True, exist_ok=True)
+        self.img_folder.mkdir(parents=True, exist_ok=True)
         unavailable_idxs = 0
         # Prepare URL and filenames for multi-processing
         entries = [(a['url'], a['name']) for idx, a in enumerate(annotations)]
         # Use multiple threads to speed up download
-        download_urls(entries, img_folder, threads=threads)
+        download_urls(entries, self.img_folder, threads=threads)
         # Verify downloads
         for idx, annotation in enumerate(annotations):
-            img_path = self._raw.joinpath('images', entries[idx][1])
-            if self._root.joinpath(img_path).is_file():
+            img_path = self.img_folder.joinpath(entries[idx][1])
+            if img_path.is_file():
                 # Encode target
                 target = self.class_to_idx[annotation['target']]
                 # Aggregate img path and annotations
diff --git a/test/test_datasets.py b/test/test_datasets.py
index 7189aacf..6b1fbf71 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -44,12 +44,17 @@ def test_downloadurls(self):
 
     def test_openfire(self):
         num_samples = 200
+        img_folder = tempfile.TemporaryDirectory().name # N.B.: different from root
 
         with Path(tempfile.TemporaryDirectory().name) as root:
 
+            # Test img_folder argument: wrong type
+            self.assertRaises(TypeError, datasets.OpenFire, root, download=True, img_folder=1)
+
             # Working case
-            train_set = datasets.OpenFire(root=root, train=True, download=True, num_samples=num_samples)
-            test_set = datasets.OpenFire(root=root, train=False, download=True, num_samples=num_samples)
+            # Test img_folder as Path and str
+            train_set = datasets.OpenFire(root=root, train=True, download=True, num_samples=num_samples, img_folder=Path(img_folder))
+            test_set = datasets.OpenFire(root=root, train=False, download=True, num_samples=num_samples, img_folder=img_folder)
             # Check inherited properties
             self.assertIsInstance(train_set, VisionDataset)
 
@@ -63,8 +68,9 @@ def test_openfire(self):
             datasets.utils.download_url(train_set.url, root, filename='extract.json', verbose=False)
             with open(root.joinpath('extract.json'), 'rb') as f:
                 extract = json.load(f)[:num_samples]
-            # Uncomment when download issues are resolved
-            # self.assertEqual(len(train_set) + len(test_set), len(extract))
+            # Test if not more than 10 downloads failed.
+            # Change to assertEqual when download issues are resolved
+            self.assertAlmostEqual(len(train_set) + len(test_set), len(extract), delta=10)
 
             # Check integrity of samples
             img, target = train_set[0]

From bce369a6de0cef6b18b6658e11c0b19c1351df7f Mon Sep 17 00:00:00 2001
From: Bruno Lenzi <Bruno.Lenzi@cern.ch>
Date: Tue, 22 Oct 2019 17:01:21 +0200
Subject: [PATCH 2/4] style / test: style changes to OpenFire and adding tests
 on img_folder

Following comments on #32:
- Avoiding long lines, also splitting if / else in OpenFire c-tor
- Changing style and adding test on default img_folder value in test_datasets.py
---
 pyronear/datasets/openfire.py |  9 ++++++---
 test/test_datasets.py         | 14 +++++++++-----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/pyronear/datasets/openfire.py b/pyronear/datasets/openfire.py
index 086303b1..6c1bd23e 100644
--- a/pyronear/datasets/openfire.py
+++ b/pyronear/datasets/openfire.py
@@ -40,11 +40,14 @@ class OpenFire(VisionDataset):
     test_file = 'test.pt'
     classes = [False, True]
 
-    def __init__(self, root, train=True, download=False, threads=16, num_samples=None, img_folder=None, **kwargs):
+    def __init__(self, root, train=True, download=False, threads=16, num_samples=None,
+                 img_folder=None, **kwargs):
         super(OpenFire, self).__init__(root, **kwargs)
         self.train = train  # training set or test set
-        self.img_folder = self._root.joinpath(self.__class__.__name__, 'images') \
-          if img_folder is None else Path(img_folder)
+        if img_folder is None:
+            self.img_folder = self._root.joinpath(self.__class__.__name__, 'images')
+        else:
+            self.img_folder = Path(img_folder)
 
         if download:
             self.download(threads, num_samples)
diff --git a/test/test_datasets.py b/test/test_datasets.py
index 6b1fbf71..e6f4f775 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -44,12 +44,16 @@ def test_downloadurls(self):
 
     def test_openfire(self):
         num_samples = 200
-        img_folder = tempfile.TemporaryDirectory().name # N.B.: different from root
 
-        with Path(tempfile.TemporaryDirectory().name) as root:
-
-            # Test img_folder argument: wrong type
+        # Test img_folder argument: wrong type and default (None)
+        with tempfile.TemporaryDirectory() as root:
             self.assertRaises(TypeError, datasets.OpenFire, root, download=True, img_folder=1)
+            ds = datasets.OpenFire(root=root, download=True, num_samples=num_samples,
+                                   img_folder=None)
+            self.assertIsInstance(ds.img_folder, Path)
+
+        with tempfile.TemporaryDirectory() as root, \
+             tempfile.TemporaryDirectory() as img_folder:
 
             # Working case
             # Test img_folder as Path and str
@@ -66,7 +70,7 @@ def test_openfire(self):
 
             # Check against number of samples in extract (limit to num_samples)
             datasets.utils.download_url(train_set.url, root, filename='extract.json', verbose=False)
-            with open(root.joinpath('extract.json'), 'rb') as f:
+            with open(Path(root).joinpath('extract.json'), 'rb') as f:
                 extract = json.load(f)[:num_samples]
             # Test if not more than 10 downloads failed.
             # Change to assertEqual when download issues are resolved

From e103f020d0e51c0e1b58cd96021faac2a8c6c198 Mon Sep 17 00:00:00 2001
From: Bruno Lenzi <Bruno.Lenzi@cern.ch>
Date: Wed, 23 Oct 2019 11:49:19 +0200
Subject: [PATCH 3/4] feat: add img-folder argument to training scripts

Adding argument --img-folder to fastai/train.py and torch/train.py
---
 references/classification/fastai/train.py | 8 ++++++--
 references/classification/torch/train.py  | 6 ++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/references/classification/fastai/train.py b/references/classification/fastai/train.py
index 907e602e..e5c22f0c 100644
--- a/references/classification/fastai/train.py
+++ b/references/classification/fastai/train.py
@@ -29,11 +29,13 @@ def main(args):
 
     # Aggregate path and labels into list for fastai ImageDataBunch
     fnames, labels, is_valid = [], [], []
-    for sample in OpenFire(root=args.data_path, train=True, download=True).data:
+    for sample in OpenFire(root=args.data_path, train=True, download=True,
+                           img_folder=args.img_folder).data:
         fnames.append(sample['path'])
         labels.append(sample['target'])
         is_valid.append(False)
-    for sample in OpenFire(root=args.data_path, train=False, download=True).data:
+    for sample in OpenFire(root=args.data_path, train=False, download=True,
+                           img_folder=args.img_folder).data:
         fnames.append(sample['path'])
         labels.append(sample['target'])
         is_valid.append(True)
@@ -61,6 +63,8 @@ def main(args):
     import argparse
     parser = argparse.ArgumentParser(description='PyroNear Classification Training with Fastai')
     parser.add_argument('--data-path', default='./data', help='dataset')
+    parser.add_argument('--img-folder', default=None,
+        help='Folder containing images. Default: <data_path>/OpenFire/images')
     parser.add_argument('--model', default='resnet18', type=str, help='model')
     parser.add_argument('--device', default='cuda', help='device')
     parser.add_argument('-b', '--batch-size', default=32, type=int)
diff --git a/references/classification/torch/train.py b/references/classification/torch/train.py
index 8da6c995..d1ec898e 100644
--- a/references/classification/torch/train.py
+++ b/references/classification/torch/train.py
@@ -163,9 +163,9 @@ def main(args):
 
     # Train & test sets
     train_set = OpenFire(root=args.data_path, train=True, download=True,
-                         transform=train_transforms)
+                         transform=train_transforms, img_folder=args.img_folder)
     val_set = OpenFire(root=args.data_path, train=False, download=True,
-                       transform=test_transforms)
+                       transform=test_transforms, img_folder=args.img_folder)
     num_classes = len(train_set.classes)
     # Samplers
     train_sampler = torch.utils.data.RandomSampler(train_set)
@@ -230,6 +230,8 @@ def main(args):
     import argparse
     parser = argparse.ArgumentParser(description='PyroNear Classification Training')
     parser.add_argument('--data-path', default='./data', help='dataset')
+    parser.add_argument('--img-folder', default=None,
+        help='Folder containing images. Default: <data_path>/OpenFire/images')
     parser.add_argument('--model', default='resnet18', help='model')
     parser.add_argument('--device', default=None, help='device')
     parser.add_argument('-b', '--batch-size', default=32, type=int)

From 79a5e23e0818314c549646c6bad72b4b53f82c97 Mon Sep 17 00:00:00 2001
From: Bruno Lenzi <Bruno.Lenzi@cern.ch>
Date: Fri, 25 Sep 2020 18:33:19 +0200
Subject: [PATCH 4/4] test: allow up to 15 failed downloads when testing
 datasets

In test_datasets.py/DatasetsTester.test_openfire: allow up to 15 failed downloads instead of 10
---
 test/test_datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 560ab1e2..31c93f76 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -83,9 +83,9 @@ def test_openfire(self):
             datasets.utils.download_url(train_set.url, root, filename='extract.json', verbose=False)
             with open(Path(root).joinpath('extract.json'), 'rb') as f:
                 extract = json.load(f)[:num_samples]
-            # Test if not more than 10 downloads failed.
+            # Test if not more than 15 downloads failed.
             # Change to assertEqual when download issues are resolved
-            self.assertAlmostEqual(len(train_set) + len(test_set), len(extract), delta=10)
+            self.assertAlmostEqual(len(train_set) + len(test_set), len(extract), delta=15)
 
             # Check integrity of samples
             img, target = train_set[0]