|
8 | 8 | import numpy as np
|
9 | 9 | import pytest
|
10 | 10 |
|
| 11 | +from datumaro.components.annotation import Bbox, Caption, Ellipse, Label, Mask, Points |
11 | 12 | from datumaro.components.dataset import Dataset
|
12 | 13 | from datumaro.components.dataset_base import DatasetItem
|
13 | 14 | from datumaro.components.errors import DatumaroError
|
14 | 15 | from datumaro.components.media import Image, PointCloud
|
15 |
| -from datumaro.components.operations import IMAGE_STATS_SCHEMA, compute_image_statistics |
| 16 | +from datumaro.components.operations import ( |
| 17 | + IMAGE_STATS_SCHEMA, |
| 18 | + compute_ann_statistics, |
| 19 | + compute_image_statistics, |
| 20 | +) |
16 | 21 |
|
17 | 22 | from tests.requirements import Requirements, mark_requirement
|
18 | 23 |
|
@@ -109,3 +114,298 @@ def test_invalid_media_type(
|
109 | 114 | with pytest.warns(UserWarning, match="only Image media_type is allowed"):
|
110 | 115 | actual = compute_image_statistics(fxt_point_cloud_dataset)
|
111 | 116 | assert actual["dataset"] == IMAGE_STATS_SCHEMA["dataset"]
|
| 117 | + |
| 118 | + |
| 119 | +class AnnStatisticsTest: |
| 120 | + @mark_requirement(Requirements.DATUM_GENERAL_REQ) |
| 121 | + def test_stats(self): |
| 122 | + dataset = Dataset.from_iterable( |
| 123 | + [ |
| 124 | + DatasetItem( |
| 125 | + id=1, |
| 126 | + media=Image.from_numpy(data=np.ones((5, 5, 3))), |
| 127 | + annotations=[ |
| 128 | + Caption("hello"), |
| 129 | + Caption("world"), |
| 130 | + Label( |
| 131 | + 2, |
| 132 | + attributes={ |
| 133 | + "x": 1, |
| 134 | + "y": "2", |
| 135 | + }, |
| 136 | + ), |
| 137 | + Bbox( |
| 138 | + 1, |
| 139 | + 2, |
| 140 | + 2, |
| 141 | + 2, |
| 142 | + label=2, |
| 143 | + attributes={ |
| 144 | + "score": 0.5, |
| 145 | + }, |
| 146 | + ), |
| 147 | + Bbox( |
| 148 | + 5, |
| 149 | + 6, |
| 150 | + 2, |
| 151 | + 2, |
| 152 | + attributes={ |
| 153 | + "x": 1, |
| 154 | + "y": "3", |
| 155 | + "occluded": True, |
| 156 | + }, |
| 157 | + ), |
| 158 | + Points([1, 2, 2, 0, 1, 1], label=0), |
| 159 | + Mask( |
| 160 | + label=3, |
| 161 | + image=np.array( |
| 162 | + [ |
| 163 | + [0, 0, 1, 1, 1], |
| 164 | + [0, 0, 1, 1, 1], |
| 165 | + [0, 0, 1, 1, 1], |
| 166 | + [0, 0, 0, 0, 0], |
| 167 | + [0, 0, 0, 0, 0], |
| 168 | + ] |
| 169 | + ), |
| 170 | + ), |
| 171 | + ], |
| 172 | + ), |
| 173 | + DatasetItem( |
| 174 | + id=2, |
| 175 | + media=Image.from_numpy(data=np.ones((2, 4, 3))), |
| 176 | + annotations=[ |
| 177 | + Label( |
| 178 | + 2, |
| 179 | + attributes={ |
| 180 | + "x": 2, |
| 181 | + "y": "2", |
| 182 | + }, |
| 183 | + ), |
| 184 | + Bbox( |
| 185 | + 1, |
| 186 | + 2, |
| 187 | + 2, |
| 188 | + 2, |
| 189 | + label=3, |
| 190 | + attributes={ |
| 191 | + "score": 0.5, |
| 192 | + }, |
| 193 | + ), |
| 194 | + Bbox( |
| 195 | + 5, |
| 196 | + 6, |
| 197 | + 2, |
| 198 | + 2, |
| 199 | + attributes={ |
| 200 | + "x": 2, |
| 201 | + "y": "3", |
| 202 | + "occluded": False, |
| 203 | + }, |
| 204 | + ), |
| 205 | + Ellipse( |
| 206 | + 5, |
| 207 | + 6, |
| 208 | + 2, |
| 209 | + 2, |
| 210 | + attributes={ |
| 211 | + "x": 2, |
| 212 | + "y": "3", |
| 213 | + "occluded": False, |
| 214 | + }, |
| 215 | + ), |
| 216 | + ], |
| 217 | + ), |
| 218 | + DatasetItem(id=3), |
| 219 | + DatasetItem(id="2.2", media=Image.from_numpy(data=np.ones((2, 4, 3)))), |
| 220 | + ], |
| 221 | + categories=["label_%s" % i for i in range(4)], |
| 222 | + ) |
| 223 | + |
| 224 | + expected = { |
| 225 | + "images count": 4, |
| 226 | + "annotations count": 11, |
| 227 | + "unannotated images count": 2, |
| 228 | + "unannotated images": ["3", "2.2"], |
| 229 | + "annotations by type": { |
| 230 | + "label": { |
| 231 | + "count": 2, |
| 232 | + }, |
| 233 | + "polygon": { |
| 234 | + "count": 0, |
| 235 | + }, |
| 236 | + "polyline": { |
| 237 | + "count": 0, |
| 238 | + }, |
| 239 | + "bbox": { |
| 240 | + "count": 4, |
| 241 | + }, |
| 242 | + "mask": { |
| 243 | + "count": 1, |
| 244 | + }, |
| 245 | + "points": { |
| 246 | + "count": 1, |
| 247 | + }, |
| 248 | + "caption": { |
| 249 | + "count": 2, |
| 250 | + }, |
| 251 | + "cuboid_3d": {"count": 0}, |
| 252 | + "super_resolution_annotation": {"count": 0}, |
| 253 | + "depth_annotation": {"count": 0}, |
| 254 | + "ellipse": {"count": 1}, |
| 255 | + "hash_key": {"count": 0}, |
| 256 | + "feature_vector": {"count": 0}, |
| 257 | + "tabular": {"count": 0}, |
| 258 | + "unknown": {"count": 0}, |
| 259 | + }, |
| 260 | + "annotations": { |
| 261 | + "labels": { |
| 262 | + "count": 6, |
| 263 | + "distribution": { |
| 264 | + "label_0": [1, 1 / 6], |
| 265 | + "label_1": [0, 0.0], |
| 266 | + "label_2": [3, 3 / 6], |
| 267 | + "label_3": [2, 2 / 6], |
| 268 | + }, |
| 269 | + "attributes": { |
| 270 | + "x": { |
| 271 | + "count": 2, # annotations with no label are skipped |
| 272 | + "values count": 2, |
| 273 | + "values present": ["1", "2"], |
| 274 | + "distribution": { |
| 275 | + "1": [1, 1 / 2], |
| 276 | + "2": [1, 1 / 2], |
| 277 | + }, |
| 278 | + }, |
| 279 | + "y": { |
| 280 | + "count": 2, # annotations with no label are skipped |
| 281 | + "values count": 1, |
| 282 | + "values present": ["2"], |
| 283 | + "distribution": { |
| 284 | + "2": [2, 2 / 2], |
| 285 | + }, |
| 286 | + }, |
| 287 | + # must not include "special" attributes like "occluded" |
| 288 | + }, |
| 289 | + }, |
| 290 | + "segments": { |
| 291 | + "avg. area": (4 * 2 + 9 * 1) / 3, |
| 292 | + "area distribution": [ |
| 293 | + {"min": 4.0, "max": 4.5, "count": 2, "percent": 2 / 3}, |
| 294 | + {"min": 4.5, "max": 5.0, "count": 0, "percent": 0.0}, |
| 295 | + {"min": 5.0, "max": 5.5, "count": 0, "percent": 0.0}, |
| 296 | + {"min": 5.5, "max": 6.0, "count": 0, "percent": 0.0}, |
| 297 | + {"min": 6.0, "max": 6.5, "count": 0, "percent": 0.0}, |
| 298 | + {"min": 6.5, "max": 7.0, "count": 0, "percent": 0.0}, |
| 299 | + {"min": 7.0, "max": 7.5, "count": 0, "percent": 0.0}, |
| 300 | + {"min": 7.5, "max": 8.0, "count": 0, "percent": 0.0}, |
| 301 | + {"min": 8.0, "max": 8.5, "count": 0, "percent": 0.0}, |
| 302 | + {"min": 8.5, "max": 9.0, "count": 1, "percent": 1 / 3}, |
| 303 | + ], |
| 304 | + "pixel distribution": { |
| 305 | + "label_0": [0, 0.0], |
| 306 | + "label_1": [0, 0.0], |
| 307 | + "label_2": [4, 4 / 17], |
| 308 | + "label_3": [13, 13 / 17], |
| 309 | + }, |
| 310 | + }, |
| 311 | + }, |
| 312 | + } |
| 313 | + |
| 314 | + actual = compute_ann_statistics(dataset) |
| 315 | + |
| 316 | + assert actual == expected |
| 317 | + |
| 318 | + @mark_requirement(Requirements.DATUM_GENERAL_REQ) |
| 319 | + def test_stats_with_empty_dataset(self): |
| 320 | + label_names = ["label_%s" % i for i in range(4)] |
| 321 | + dataset = Dataset.from_iterable( |
| 322 | + [ |
| 323 | + DatasetItem(id=1), |
| 324 | + DatasetItem(id=3), |
| 325 | + ], |
| 326 | + categories=label_names, |
| 327 | + ) |
| 328 | + |
| 329 | + expected = self._get_stats_template(label_names) |
| 330 | + expected["images count"] = 2 |
| 331 | + expected["unannotated images count"] = 2 |
| 332 | + expected["unannotated images"] = ["1", "3"] |
| 333 | + |
| 334 | + actual = compute_ann_statistics(dataset) |
| 335 | + assert actual == expected |
| 336 | + |
| 337 | + @mark_requirement(Requirements.DATUM_BUG_1204) |
| 338 | + def test_stats_with_invalid_label(self): |
| 339 | + label_names = ["label_%s" % i for i in range(3)] |
| 340 | + dataset = Dataset.from_iterable( |
| 341 | + iterable=[DatasetItem(id=f"item{i}", annotations=[Label(i)]) for i in range(4)], |
| 342 | + categories=label_names, |
| 343 | + ) |
| 344 | + |
| 345 | + expected = self._get_stats_template(label_names) |
| 346 | + expected["images count"] = 4 |
| 347 | + expected["annotations count"] = 4 |
| 348 | + expected["annotations by type"]["label"]["count"] = 4 |
| 349 | + expected["annotations"]["labels"]["count"] = 4 |
| 350 | + expected["annotations"]["labels"]["distribution"] = { |
| 351 | + "label_0": [1, 0.25], |
| 352 | + "label_1": [1, 0.25], |
| 353 | + "label_2": [1, 0.25], |
| 354 | + 3: [1, 0.25], # label which does not exist in categories. |
| 355 | + } |
| 356 | + |
| 357 | + actual = compute_ann_statistics(dataset) |
| 358 | + |
| 359 | + assert actual == expected |
| 360 | + |
| 361 | + @staticmethod |
| 362 | + def _get_stats_template(label_names: list): |
| 363 | + return { |
| 364 | + "images count": 0, |
| 365 | + "annotations count": 0, |
| 366 | + "unannotated images count": 0, |
| 367 | + "unannotated images": [], |
| 368 | + "annotations by type": { |
| 369 | + "label": { |
| 370 | + "count": 0, |
| 371 | + }, |
| 372 | + "polygon": { |
| 373 | + "count": 0, |
| 374 | + }, |
| 375 | + "polyline": { |
| 376 | + "count": 0, |
| 377 | + }, |
| 378 | + "bbox": { |
| 379 | + "count": 0, |
| 380 | + }, |
| 381 | + "mask": { |
| 382 | + "count": 0, |
| 383 | + }, |
| 384 | + "points": { |
| 385 | + "count": 0, |
| 386 | + }, |
| 387 | + "caption": { |
| 388 | + "count": 0, |
| 389 | + }, |
| 390 | + "cuboid_3d": {"count": 0}, |
| 391 | + "super_resolution_annotation": {"count": 0}, |
| 392 | + "depth_annotation": {"count": 0}, |
| 393 | + "ellipse": {"count": 0}, |
| 394 | + "hash_key": {"count": 0}, |
| 395 | + "feature_vector": {"count": 0}, |
| 396 | + "tabular": {"count": 0}, |
| 397 | + "unknown": {"count": 0}, |
| 398 | + }, |
| 399 | + "annotations": { |
| 400 | + "labels": { |
| 401 | + "count": 0, |
| 402 | + "distribution": {n: [0, 0] for n in label_names}, |
| 403 | + "attributes": {}, |
| 404 | + }, |
| 405 | + "segments": { |
| 406 | + "avg. area": 0.0, |
| 407 | + "area distribution": [], |
| 408 | + "pixel distribution": {n: [0, 0] for n in label_names}, |
| 409 | + }, |
| 410 | + }, |
| 411 | + } |
0 commit comments