From e116d8d74e3b5c9350afd295461f6f0f48a53cf8 Mon Sep 17 00:00:00 2001
From: yasakova-anastasia <anastasia.yasakova@intel.com>
Date: Mon, 18 Jan 2021 17:31:21 +0300
Subject: [PATCH 1/2] Add a folder for unlabeled items in VggFace2 dataset
 format

---
 datumaro/plugins/vgg_face2_format.py | 40 ++++++++++++++++++----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/datumaro/plugins/vgg_face2_format.py b/datumaro/plugins/vgg_face2_format.py
index 1e693c5456..bd7a818be2 100644
--- a/datumaro/plugins/vgg_face2_format.py
+++ b/datumaro/plugins/vgg_face2_format.py
@@ -17,6 +17,7 @@ class VggFace2Path:
     BBOXES_FILE = 'loose_bb_'
     LANDMARKS_FILE = 'loose_landmark_'
     LABELS_FILE = 'labels.txt'
+    IMAGES_DIR_NO_LABEL = 'no_label'
 
 class VggFace2Extractor(SourceExtractor):
     def __init__(self, path):
@@ -50,8 +51,9 @@ def _load_categories(self):
             subset_path = osp.join(self._dataset_dir, self._subset)
             if osp.isdir(subset_path):
                 for images_dir in sorted(os.listdir(subset_path)):
-                    if osp.isdir(osp.join(subset_path, images_dir)):
-                       label_cat.add(images_dir)
+                    if osp.isdir(osp.join(subset_path, images_dir)) and \
+                            images_dir != VggFace2Path.IMAGES_DIR_NO_LABEL:
+                        label_cat.add(images_dir)
         return { AnnotationType.label: label_cat }
 
     def _load_items(self, path):
@@ -64,9 +66,10 @@ def _load_items(self, path):
             label = None
             if '/' in item_id:
                 label_name = item_id.split('/')[0]
-                label = self._categories[AnnotationType.label].find(label_name)[0]
-                if label is not None:
-                    item_id = item_id[len(label_name) + 1:]
+                if label_name != VggFace2Path.IMAGES_DIR_NO_LABEL:
+                    label = \
+                        self._categories[AnnotationType.label].find(label_name)[0]
+                item_id = item_id[len(label_name) + 1:]
             if item_id not in items:
                 image_path = osp.join(self._dataset_dir, self._subset,
                     row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
@@ -90,9 +93,10 @@ def _load_items(self, path):
                 label = None
                 if '/' in item_id:
                     label_name = item_id.split('/')[0]
-                    label = self._categories[AnnotationType.label].find(label_name)[0]
-                    if label is not None:
-                        item_id = item_id[len(label_name) + 1:]
+                    if label_name != VggFace2Path.IMAGES_DIR_NO_LABEL:
+                        label = \
+                            self._categories[AnnotationType.label].find(label_name)[0]
+                    item_id = item_id[len(label_name) + 1:]
                 if item_id not in items:
                     image_path = osp.join(self._dataset_dir, self._subset,
                         row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
@@ -146,16 +150,19 @@ def apply(self):
                                 + item.id + VggFace2Path.IMAGE_EXT))
                     else:
                         self._save_image(item, osp.join(subset_dir,
+                            VggFace2Path.IMAGES_DIR_NO_LABEL,
                             item.id + VggFace2Path.IMAGE_EXT))
 
                 landmarks = [a for a in item.annotations
                     if a.type == AnnotationType.points]
                 for landmark in landmarks:
-                    name_id = item.id
-                    if landmark.label is not None \
-                        and label_categories[landmark.label].name:
+                    if landmark.label is not None and \
+                            label_categories[landmark.label].name:
                         name_id = label_categories[landmark.label].name \
                             + '/' + item.id
+                    else:
+                        name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \
+                            + '/' + item.id
                     points = landmark.points
                     landmarks_table.append({'NAME_ID': name_id,
                         'P1X': points[0], 'P1Y': points[1],
@@ -167,26 +174,31 @@ def apply(self):
                 bboxes = [a for a in item.annotations
                     if a.type == AnnotationType.bbox]
                 for bbox in bboxes:
-                    name_id = item.id
                     if bbox.label is not None and \
                             label_categories[bbox.label].name:
                         name_id = label_categories[bbox.label].name \
                             + '/' + item.id
+                    else:
+                        name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \
+                            + '/' + item.id
                     bboxes_table.append({'NAME_ID': name_id, 'X': bbox.x,
                         'Y': bbox.y, 'W': bbox.w, 'H': bbox.h})
 
                 labels = [a for a in item.annotations
                     if a.type == AnnotationType.label]
                 for label in labels:
-                    name_id = item.id
                     if label.label is not None and \
                             label_categories[label.label].name:
                         name_id = label_categories[label.label].name \
                             + '/' + item.id
+                    else:
+                        name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \
+                            + '/' + item.id
                     landmarks_table.append({'NAME_ID': name_id})
 
                 if not landmarks and not bboxes and not labels:
-                    landmarks_table.append({'NAME_ID': item.id})
+                    landmarks_table.append({'NAME_ID':
+                        VggFace2Path.IMAGES_DIR_NO_LABEL + '/' + item.id})
 
             landmarks_path = osp.join(save_dir, VggFace2Path.ANNOTATION_DIR,
                 VggFace2Path.LANDMARKS_FILE + subset_name + '.csv')

From bb9048c241fa5755cf2cbe13acbf39c2ed4cc171 Mon Sep 17 00:00:00 2001
From: yasakova-anastasia <anastasia.yasakova@intel.com>
Date: Tue, 19 Jan 2021 09:00:27 +0300
Subject: [PATCH 2/2] add test

---
 tests/test_vgg_face2_format.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/test_vgg_face2_format.py b/tests/test_vgg_face2_format.py
index aa25196062..4e260d6bcb 100644
--- a/tests/test_vgg_face2_format.py
+++ b/tests/test_vgg_face2_format.py
@@ -88,6 +88,28 @@ def test_can_save_dataset_with_no_save_images(self):
 
             compare_datasets(self, source_dataset, parsed_dataset)
 
+    def test_can_save_dataset_with_no_labels(self):
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(0, 2, 4, 2, group=1),
+                    Points([4.23, 4.32, 5.34, 4.45, 3.54,
+                        3.56, 4.52, 3.51, 4.78, 3.34], group=1),
+                ]
+            ),
+            DatasetItem(id='2', image=np.ones((8, 8, 3)),
+                annotations=[
+                    Bbox(2, 2, 4, 2, group=1),
+                ]
+            ),
+        ], categories=[])
+
+        with TestDir() as test_dir:
+            VggFace2Converter.convert(source_dataset, test_dir, save_images=False)
+            parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2')
+
+            compare_datasets(self, source_dataset, parsed_dataset)
+
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'vgg_face2_dataset')
 
 class VggFace2ImporterTest(TestCase):