openvinotoolkit · sooahleex · Nov 1, 2022 · Oct 24, 2022 · Oct 24, 2022 · Oct 25, 2022
@@ -12,13 +12,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/742>)
 - Add jupyter sample introducing how to merge datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/738>)
+  - introducing how to filter dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/748>)
 - Add Visualization Python API
   - Bbox
     (<https://github.com/openvinotoolkit/datumaro/pull/744>)
   - Add Label, Points, Polygon, PolyLine, and Caption visualization features
     (<https://github.com/openvinotoolkit/datumaro/pull/746>)
 - Add a documentation tab menu for Python API
   (<https://github.com/openvinotoolkit/datumaro/pull/753>)
+- Add `__repr__` for Dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/750>)
 
 ### Changed
 - Updated `networkx` version to 2.6

@@ -114,9 +114,16 @@ def get_subset(self, name):
     def subsets(self):
         return self.data
 
-    def get_annotated_size(self):
+    def get_annotated_items(self):
         return sum(bool(s.annotations) for s in self._traversal_order.values())
 
+    def get_annotations(self):
+        annotations_by_type = {t.name: {"count": 0} for t in AnnotationType}
+        for item in self._traversal_order.values():
+            for ann in item.annotations:
+                annotations_by_type[ann.type.name]["count"] += 1
+        return sum(t["count"] for t in annotations_by_type.values())
+
     def __copy__(self):
         copied = DatasetItemStorage()
         copied._traversal_order = copy(self._traversal_order)
@@ -282,9 +289,16 @@ def categories(self):
     def media_type(self):
         return self.parent.media_type()
 
-    def get_annotated_size(self):
+    def get_annotated_items(self):
         return sum(bool(s.annotations) for s in self.parent._data.get_subset(self.name))
 
+    def get_annotations(self):
+        annotations_by_type = {t.name: {"count": 0} for t in AnnotationType}
+        for item in self.parent._data.get_subset(self.name):
+            for ann in item.annotations:
+                annotations_by_type[ann.type.name]["count"] += 1
+        return sum(t["count"] for t in annotations_by_type.values())
+
     def get_annotated_type(self):
         annotation_types = []
         for item in self.parent._data.get_subset(self.name):
@@ -631,8 +645,11 @@ def subsets(self):
         # and other cases
         return self._merged().subsets()
 
-    def get_annotated_size(self):
-        return self._storage.get_annotated_size()
+    def get_annotated_items(self):
+        return self._storage.get_annotated_items()
+
+    def get_annotations(self):
+        return self._storage.get_annotations()
 
     def transform(self, method: Type[Transform], *args, **kwargs):
         # Flush accumulated changes
@@ -829,7 +846,8 @@ def __repr__(self) -> str:
             f"\tsize={len(self._data)}\n"
             f"\tsource_path={self._source_path}\n"
             f"\tmedia_type={self.media_type()}\n"
-            f"\tannotated_count={self.get_annotated_size()}\n"
+            f"\tannotated_items_count={self.get_annotated_items()}\n"
+            f"\tannotations_count={self.get_annotations()}\n"
             f"subsets\n"
             f"\t{separator.join(self.get_subset_info())}"
             f"categories\n"
@@ -863,13 +881,17 @@ def media_type(self) -> Type[MediaElement]:
     def get(self, id: str, subset: Optional[str] = None) -> Optional[DatasetItem]:
         return self._data.get(id, subset)
 
-    def get_annotated_size(self):
-        return self._data.get_annotated_size()
+    def get_annotated_items(self):
+        return self._data.get_annotated_items()
+
+    def get_annotations(self):
+        return self._data.get_annotations()
 
     def get_subset_info(self):
         return (
             f"{subset_name}: # of items={len(self.get_subset(subset_name))}, "
-            f"# of annotations={self.get_subset(subset_name).get_annotated_size()}, "
+            f"# of annotated items={self.get_subset(subset_name).get_annotated_items()}, "
+            f"# of annotations={self.get_subset(subset_name).get_annotations()}, "
             f"annotation types={self.get_subset(subset_name).get_annotated_type()}\n"
             for subset_name in sorted(self.subsets().keys())
         )