Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance unit test for explore #1266

Merged
merged 2 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/1246>)
- Enhance RISE algortihm for explainable AI
(<https://github.com/openvinotoolkit/datumaro/pull/1263>)
- Enhance explore unit test to use real dataset from ImageNet
(<https://github.com/openvinotoolkit/datumaro/pull/1266>)

### Bug fixes
- Fix wrong example of Datumaro dataset creation in document
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
163 changes: 37 additions & 126 deletions tests/unit/test_explorer.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,24 @@
import os.path as osp
from copy import deepcopy
from functools import partial
from unittest import TestCase

import numpy as np

from datumaro.components.algorithms.hash_key_inference.explorer import Explorer
from datumaro.components.annotation import Caption, Label
from datumaro.components.annotation import AnnotationType, Caption, Label
from datumaro.components.dataset import Dataset
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.errors import MediaTypeError
from datumaro.components.media import Image
from datumaro.plugins.data_formats.datumaro.exporter import DatumaroExporter

from ..requirements import Requirements, mark_requirement

from tests.requirements import Requirements, mark_requirement
from tests.utils.assets import get_test_asset_path
from tests.utils.test_utils import TestDir


class ExplorerTest(TestCase):
@property
def test_dataset(self):
train_img = np.full((5, 5, 3), 255, dtype=np.uint8)
train_img[2, :] = 0
test_img = np.full((5, 5, 3), 0, dtype=np.uint8)
test_img[2, :] = 255

dataset = Dataset.from_iterable(
[
DatasetItem(
id=1,
subset="train",
media=Image.from_numpy(data=train_img),
annotations=[Label(1, id=1), Caption("cat")],
),
DatasetItem(
id=2,
subset="train",
media=Image.from_numpy(data=train_img),
annotations=[Label(1, id=1), Caption("cat")],
),
DatasetItem(
id=3,
subset="test",
media=Image.from_numpy(data=test_img),
annotations=[Label(2, id=2), Caption("dog")],
),
DatasetItem(
id=4,
subset="test",
media=Image.from_numpy(data=test_img),
annotations=[Label(2, id=2), Caption("dog")],
),
]
)
return dataset

@property
def test_dataset_black_white(self):
train_img = np.full((5, 5, 3), 255, dtype=np.uint8)
test_img = np.full((5, 5, 3), 0, dtype=np.uint8)

dataset = Dataset.from_iterable(
[
DatasetItem(
id=1,
subset="train",
media=Image.from_numpy(data=train_img),
annotations=[Label(1, id=1), Caption("cat")],
),
DatasetItem(
id=2,
subset="train",
media=Image.from_numpy(data=train_img),
annotations=[Label(1, id=1), Caption("cat")],
),
DatasetItem(
id=3,
subset="test",
media=Image.from_numpy(data=test_img),
annotations=[Label(2, id=2), Caption("dog")],
),
DatasetItem(
id=4,
subset="test",
media=Image.from_numpy(data=test_img),
annotations=[Label(2, id=2), Caption("dog")],
),
]
)
return dataset

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_explore_img_query(self):
"""
Expand All @@ -107,28 +36,14 @@ def test_explore_img_query(self):
2. Set Explorer and try explore_topk to find similar media of image query.
3. Check whether each result have same subset as query.
"""
with TestDir() as test_dir:
converter = partial(DatumaroExporter.convert, save_media=True)
converter(self.test_dataset, test_dir)
imported_dataset = Dataset.import_from(test_dir, "datumaro")
for i, item in enumerate(imported_dataset):
if i == 1:
query = item
explorer = Explorer(imported_dataset)
results = explorer.explore_topk(query, topk=2)

for item in results:
# There were two "train_img"s in "train" subset, and we queried "train_img"
self.assertEqual(query.subset, item.subset)

query_without_hash_key = deepcopy(item)
query_without_hash_key.annotations = []
dataset = Dataset.import_from(get_test_asset_path("explore_dataset"), "imagenet")
query_item = dataset[0]

results = explorer.explore_topk(query_without_hash_key, topk=2)
explorer = Explorer(dataset)
results = explorer.explore_topk(query_item, topk=3)

for item in results:
# There were two "train_img"s in "train" subset, and we queried "train_img"
self.assertEqual(query_without_hash_key.subset, item.subset)
for item in results:
self.assertEqual(query_item.annotations[0].label, item.annotations[0].label)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_explore_img_list_query(self):
Expand All @@ -147,18 +62,15 @@ def test_explore_img_list_query(self):
2. Set Explorer and try explore_topk to find similar media of image query.
3. Check whether each result have same subset as query.
"""
with TestDir() as test_dir:
converter = partial(DatumaroExporter.convert, save_media=True)
converter(self.test_dataset, test_dir)
imported_dataset = Dataset.import_from(test_dir, "datumaro")
query_list = []
for i, item in enumerate(imported_dataset):
if i in [1, 2]:
query_list.append(item)
explorer = Explorer(imported_dataset)
results = explorer.explore_topk(query_list, topk=2)

self.assertEqual(results[0].subset, results[1].subset)
dataset = Dataset.import_from(get_test_asset_path("explore_dataset"), "imagenet")
query_list = [dataset[i] for i in [0, 1]]

explorer = Explorer(dataset)
results = explorer.explore_topk(query_list, topk=3)

for item in results:
self.assertEqual(query_list[0].annotations[0].label, item.annotations[0].label)
self.assertEqual(query_list[1].annotations[0].label, item.annotations[0].label)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_explore_txt_query(self):
Expand All @@ -177,15 +89,15 @@ def test_explore_txt_query(self):
2. Set Explorer and try explore_topk to find similar media of text query.
3. Check whether each result have same subset as query.
"""
with TestDir() as test_dir:
converter = partial(DatumaroExporter.convert, save_media=True)
converter(self.test_dataset, test_dir)
imported_dataset = Dataset.import_from(test_dir, "datumaro")
explorer = Explorer(imported_dataset)
results = explorer.explore_topk(
"a photo of a upper white and bottom black background", topk=2
)
self.assertEqual(results[0].subset, results[1].subset)
dataset = Dataset.import_from(get_test_asset_path("explore_dataset"), "imagenet")
query_txt = "dog"
query_label, _ = dataset.categories()[AnnotationType.label].find(query_txt)

explorer = Explorer(dataset)
results = explorer.explore_topk(query_txt, topk=3)

for item in results:
self.assertEqual(query_label, item.annotations[0].label)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_explore_txt_list_query(self):
Expand All @@ -204,16 +116,15 @@ def test_explore_txt_list_query(self):
2. Set Explorer and try explore_topk to find similar media of list of text query.
3. Check whether each result have same subset as query.
"""
with TestDir() as test_dir:
converter = partial(DatumaroExporter.convert, save_media=True)
converter(self.test_dataset, test_dir)
imported_dataset = Dataset.import_from(test_dir, "datumaro")
explorer = Explorer(imported_dataset)
results = explorer.explore_topk(
["a photo of a upper white and bottom black background"],
topk=2,
)
self.assertEqual(results[0].subset, results[1].subset)
dataset = Dataset.import_from(get_test_asset_path("explore_dataset"), "imagenet")
query_list = ["dog", "fluffy"]
query_label, _ = dataset.categories()[AnnotationType.label].find(query_list[0])

explorer = Explorer(dataset)
results = explorer.explore_topk(query_list, topk=3)

for item in results:
self.assertEqual(query_label, item.annotations[0].label)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_multiframeimage_assert(self):
Expand Down
Loading