Support more image formats in ImageNet (cvat-ai#85)

TOsmanov · Jan 14, 2021 · 9ff4611 · 9ff4611
1 parent 1ee908f
commit 9ff4611
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 11 deletions.
diff --git a/datumaro/plugins/imagenet_format.py b/datumaro/plugins/imagenet_format.py
@@ -15,7 +15,8 @@
 
 
 class ImagenetPath:
-    IMAGES_EXT = '.jpg'
+    DEFAULT_IMAGE_EXT = '.jpg'
+    IMAGE_EXT_FORMAT = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
     IMAGES_DIR_NO_LABEL = 'no_label'
 
 
@@ -37,7 +38,7 @@ def _load_categories(self, path):
     def _load_items(self, path):
         items = {}
         for image_path in glob(osp.join(path, '*', '*')):
-            if osp.splitext(image_path)[1] != ImagenetPath.IMAGES_EXT:
+            if osp.splitext(image_path)[1] not in ImagenetPath.IMAGE_EXT_FORMAT:
                 continue
             label = osp.basename(osp.dirname(image_path))
             image_name = osp.splitext(osp.basename(image_path))[0][len(label) + 1:]
@@ -62,7 +63,7 @@ def find_sources(cls, path):
 
 
 class ImagenetConverter(Converter):
-    DEFAULT_IMAGE_EXT = ImagenetPath.IMAGES_EXT
+    DEFAULT_IMAGE_EXT = ImagenetPath.DEFAULT_IMAGE_EXT
 
     def apply(self):
         if 1 < len(self._extractor.subsets()):
@@ -79,12 +80,10 @@ def apply(self):
             for label in labels[image_name]:
                 label_name = extractor.categories()[AnnotationType.label][label].name
                 self._save_image(item, osp.join(subset_dir, label_name,
-                    '%s_%s%s' % \
-                    (label_name, image_name, ImagenetPath.IMAGES_EXT)
-                ))
+                    '%s_%s' %  (label_name, self._make_image_filename(item))))
 
             if not labels[image_name]:
                 self._save_image(item, osp.join(subset_dir,
                     ImagenetPath.IMAGES_DIR_NO_LABEL,
-                    ImagenetPath.IMAGES_DIR_NO_LABEL + '_' +
-                    image_name + ImagenetPath.IMAGES_EXT))
+                    ImagenetPath.IMAGES_DIR_NO_LABEL + '_'
+                    + self._make_image_filename(item)))
diff --git a/datumaro/plugins/imagenet_txt_format.py b/datumaro/plugins/imagenet_txt_format.py
@@ -14,6 +14,8 @@
 
 
 class ImagenetTxtPath:
+    DEFAULT_IMAGE_EXT = '.jpg'
+    IMAGE_EXT_FORMAT = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
     LABELS_FILE = 'synsets.txt'
     IMAGE_DIR = 'images'
 
@@ -56,9 +58,14 @@ def _load_items(self, path):
                         label < len(self._categories[AnnotationType.label]), \
                         "Image '%s': unknown label id '%s'" % (item_id, label)
                     anno.append(Label(label))
+                image_path = osp.join(self.image_dir, item_id +
+                    ImagenetTxtPath.DEFAULT_IMAGE_EXT)
+                for path in glob(osp.join(self.image_dir, item_id + '*')):
+                    if osp.splitext(path)[1] in ImagenetTxtPath.IMAGE_EXT_FORMAT:
+                        image_path = path
+                        break
                 items[item_id] = DatasetItem(id=item_id, subset=self._subset,
-                    image=osp.join(self.image_dir, item_id + '.jpg'),
-                    annotations=anno)
+                    image=image_path, annotations=anno)
         return items
 
 
@@ -75,7 +82,7 @@ def find_sources(cls, path):
 
 
 class ImagenetTxtConverter(Converter):
-    DEFAULT_IMAGE_EXT = '.jpg'
+    DEFAULT_IMAGE_EXT = ImagenetTxtPath.DEFAULT_IMAGE_EXT
 
     def apply(self):
         subset_dir = self._save_dir