Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Louis-Dupont committed Nov 21, 2023
1 parent c5a7e05 commit fef6347
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 149 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,4 @@ def __call__(self, samples: Iterable[SupportedDataType]) -> Tuple[torch.Tensor,
images, targets = super().__call__(samples=samples) # This already returns a batch of (images, targets)
transform = SegmentationDataSet.get_normalize_transform()
images = transform(images / 255) # images are [0-255] after the data adapter
targets = targets.argmax(1)
return images, targets
17 changes: 13 additions & 4 deletions tests/integration_tests/data_adapter/test_dataloader_adapter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os.path
import unittest
import tempfile
import shutil
Expand All @@ -10,6 +11,7 @@
from data_gradients.managers.segmentation_manager import SegmentationAnalysisManager
from data_gradients.managers.classification_manager import ClassificationAnalysisManager
from data_gradients.dataset_adapters.config.data_config import SegmentationDataConfig
from data_gradients.utils.data_classes.image_channels import ImageChannels

from super_gradients.training.dataloaders.adapters import (
DetectionDataloaderAdapterFactory,
Expand All @@ -20,7 +22,10 @@

class DataloaderAdapterTest(unittest.TestCase):
def setUp(self) -> None:
self.tmp_dir = tempfile.mkdtemp()
if os.getenv("DEBUG_DIR"): # This is useful when debugging locally, to avoid downloading the dataset everytime
self.tmp_dir = os.path.join(os.getenv("DEBUG_DIR"), "DataloaderAdapterNonRegressionTest")
else:
self.tmp_dir = tempfile.mkdtemp()

def tearDown(self):
shutil.rmtree(self.tmp_dir)
Expand All @@ -38,8 +43,10 @@ def __call__(self, pic):
log_dir=self.tmp_dir,
report_title="Caltech101",
class_names=train_set.categories,
image_channels=ImageChannels.from_str("RGB"),
is_batch=False,
labels_extractor="[1]", # dataset returns (image, label)
batches_early_stop=4,
n_image_channels=3,
use_cache=True,
)
analyzer.run()
Expand Down Expand Up @@ -121,7 +128,8 @@ def voc_format_to_bbox(sample: tuple) -> np.ndarray:
train_data=train_set,
val_data=val_set,
labels_extractor=voc_format_to_bbox,
class_names=PASCAL_VOC_CLASS_NAMES,
class_names=list(PASCAL_VOC_CLASS_NAMES),
image_channels=ImageChannels.from_str("RGB"),
# class_names=train_set,
batches_early_stop=20,
use_cache=True, # With this we will be asked about the dataset information only once
Expand Down Expand Up @@ -181,7 +189,8 @@ def test_torchvision_segmentation(self):
log_dir=self.tmp_dir,
train_data=train_set,
val_data=val_set,
class_names=list(range(256)),
class_names=[f"class_{i}" for i in range(256)],
image_channels=ImageChannels.from_str("RGB"),
# class_names=train_set,
batches_early_stop=20,
use_cache=True, # With this we will be asked about the dataset information only once
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from data_gradients.managers.detection_manager import DetectionAnalysisManager
from data_gradients.managers.segmentation_manager import SegmentationAnalysisManager
from data_gradients.managers.classification_manager import ClassificationAnalysisManager
from data_gradients.utils.data_classes.image_channels import ImageChannels

from super_gradients.training.dataloaders.dataloaders import coco2017_val, cityscapes_stdc_seg50_val, cifar10_val
from super_gradients.training.dataloaders.adapters import (
Expand Down Expand Up @@ -36,6 +37,7 @@ def test_adapter_on_coco2017_val(self):
train_data=loader,
val_data=loader,
class_names=loader.dataset.classes,
image_channels=ImageChannels.from_str("RGB"),
batches_early_stop=20,
use_cache=True, # With this we will be asked about the data information only once
bbox_format="cxcywh",
Expand All @@ -60,6 +62,7 @@ def test_adapter_on_cityscapes_stdc_seg50_val(self):
train_data=loader,
val_data=loader,
class_names=loader.dataset.classes + ["<unknown>"],
image_channels=ImageChannels.from_str("RGB"),
batches_early_stop=1,
use_cache=True, # With this we will be asked about the data information only once
)
Expand All @@ -69,6 +72,7 @@ def test_adapter_on_cityscapes_stdc_seg50_val(self):

for (adapted_images, adapted_targets), (images, targets) in zip(adapted_loader, loader):
assert np.isclose(adapted_targets, targets).all()
assert np.isclose(adapted_images, images).all()
os.remove(analyzer.data_config.cache_path)

def test_adapter_on_cifar10_val(self):
Expand All @@ -81,6 +85,7 @@ def test_adapter_on_cifar10_val(self):
train_data=loader,
val_data=loader,
class_names=list(range(10)),
image_channels=ImageChannels.from_str("RGB"),
batches_early_stop=20,
use_cache=True, # With this we will be asked about the data information only once
)
Expand All @@ -93,88 +98,31 @@ def test_adapter_on_cifar10_val(self):
assert np.isclose(adapted_images, images).all()
os.remove(analyzer.data_config.cache_path)

def test_adpter_from_dict(self):
def test_ddp_python_based_adapter(self):
# setup_device(num_gpus=3)

# We use Validation set because it does not include augmentation (which is random and makes it impossible to compare results)
loader = coco2017_val(
dataset_params={"max_num_samples": 500, "with_crowd": False},
dataloader_params={"collate_fn": "DetectionCollateFN"},
) # `max_num_samples` To make it faster
loader = cifar10_val(dataset_params={"transforms": ["ToTensor"]})

analyzer = DetectionAnalysisManager(
report_title="coco2017_val_dict",
analyzer = ClassificationAnalysisManager(
report_title="test_python_classification",
log_dir=self.tmp_dir,
train_data=loader,
val_data=loader,
class_names=loader.dataset.classes,
class_names=list(range(10)),
image_channels=ImageChannels.from_str("RGB"),
batches_early_stop=20,
use_cache=True, # With this we will be asked about the data information only once
bbox_format="cxcywh",
is_label_first=True,
)
analyzer.run()

# Here we mimic how it works when loading from a recipe
adapted_loader = coco2017_val(
dataset_params={"max_num_samples": 500, "with_crowd": False},
dataloader_params={
"collate_fn": {
"DetectionDatasetAdapterCollateFN": {
"base_collate_fn": "DetectionCollateFN",
"config_path": analyzer.data_config.cache_path,
}
}
},
)
adapted_loader = ClassificationDataloaderAdapterFactory.from_dataloader(dataloader=loader, config_path=analyzer.data_config.cache_path)

for (adapted_images, adapted_targets), (images, targets) in zip(adapted_loader, loader):
assert np.isclose(adapted_targets, targets).all()
assert np.isclose(adapted_images, images).all()
os.remove(analyzer.data_config.cache_path)

def test_ddp_from_dict_based_adapter(self):
# setup_device(num_gpus=3)

# We use Validation set because it does not include augmentation (which is random and makes it impossible to compare results)
loader = coco2017_val(
dataset_params={"max_num_samples": 500, "with_crowd": False},
dataloader_params={"num_workers": 4, "collate_fn": "DetectionCollateFN"},
)

# We use Validation set because it does not include augmentation (which is random and makes it impossible to compare results)
adapted_loader = coco2017_val(
dataset_params={"max_num_samples": 500, "with_crowd": False}, # `max_num_samples` To make it faster
dataloader_params={
"num_workers": 4,
"collate_fn": {
"DetectionDatasetAdapterCollateFN": {
"base_collate_fn": "DetectionCollateFN",
"config_path": os.path.join(self.tmp_dir, "test_ddp_from_dict_based_adapter.json"),
}
},
},
)

for (adapted_images, adapted_targets), (images, targets) in zip(adapted_loader, loader):
assert np.isclose(adapted_targets, targets).all()
assert np.isclose(adapted_images, images).all()

def test_ddp_python_based_adapter(self):
# setup_device(num_gpus=3)

# We use Validation set because it does not include augmentation (which is random and makes it impossible to compare results)
loader = coco2017_val(
dataset_params={"max_num_samples": 500, "with_crowd": False}, # `max_num_samples` To make it faster
dataloader_params={"num_workers": 4, "collate_fn": "DetectionCollateFN"},
)
adapted_loader = DetectionDataloaderAdapterFactory.from_dataloader(
dataloader=loader,
config_path=os.path.join(self.tmp_dir, "test_ddp_python_based_adapter.json"),
)

for (adapted_images, adapted_targets), (images, targets) in zip(adapted_loader, loader):
assert np.isclose(adapted_targets, targets).all()
assert np.isclose(adapted_images, images).all()


if __name__ == "__main__":
DataloaderAdapterNonRegressionTest()
96 changes: 18 additions & 78 deletions tests/unit_tests/test_data_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from data_gradients.managers.detection_manager import DetectionAnalysisManager
from data_gradients.managers.segmentation_manager import SegmentationAnalysisManager
from data_gradients.managers.classification_manager import ClassificationAnalysisManager
from super_gradients.training.utils.collate_fn import (
DetectionDatasetAdapterCollateFN,
SegmentationDatasetAdapterCollateFN,
ClassificationDatasetAdapterCollateFN,
from data_gradients.utils.data_classes.image_channels import ImageChannels

from super_gradients.training.dataloaders.adapters import (
ClassificationDataloaderAdapterFactory,
DetectionDataloaderAdapterFactory,
SegmentationDataloaderAdapterFactory,
)


Expand Down Expand Up @@ -74,33 +76,14 @@ def test_adapt_dataset_detection(self):
train_data=self.dataset,
val_data=self.dataset,
class_names=list(map(str, range(6))),
image_channels=ImageChannels.from_str("RGB"),
use_cache=True,
is_label_first=False,
bbox_format="xywh",
)
analyzer_ds.run() # Run the analysis. This will create the cache.

loader = DataLoader(self.dataset, batch_size=2, collate_fn=DetectionDatasetAdapterCollateFN(config_path=analyzer_ds.config.cache_path, n_classes=6))

for expected_images_shape, expected_targets, (images, targets) in zip(self.expected_image_shapes_batches, self.expected_targets_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
self.assertTrue(((0 <= images) & (images <= 255)).all()) # Should be 0-255
self.assertTrue(torch.equal(targets, expected_targets))

def test_overriding_collate_detection(self):
loader = DataLoader(self.dataset, batch_size=2)
analyzer_ds = DetectionAnalysisManager(
report_title="test_overriding_collate_detection",
train_data=loader,
val_data=loader,
class_names=list(map(str, range(6))),
use_cache=True,
is_label_first=False,
bbox_format="xywh",
)
analyzer_ds.run() # Run the analysis. This will create the cache.

loader.collate_fn = DetectionDatasetAdapterCollateFN(collate_fn=loader.collate_fn, config_path=analyzer_ds.config.cache_path, n_classes=6)
loader = DetectionDataloaderAdapterFactory.from_dataset(dataset=self.dataset, config_path=analyzer_ds.data_config.cache_path, batch_size=2)

for expected_images_shape, expected_targets, (images, targets) in zip(self.expected_image_shapes_batches, self.expected_targets_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
Expand All @@ -116,13 +99,14 @@ def test_adapt_dataloader_detection(self):
train_data=loader,
val_data=loader,
class_names=list(map(str, range(6))),
image_channels=ImageChannels.from_str("RGB"),
use_cache=True,
is_label_first=False,
bbox_format="xywh",
)
analyzer_ds.run()

loader = DetectionDatasetAdapterCollateFN.adapt_dataloader(dataloader=loader, config_path=analyzer_ds.config.cache_path, n_classes=6)
loader = DetectionDataloaderAdapterFactory.from_dataloader(dataloader=loader, config_path=analyzer_ds.data_config.cache_path)

for expected_images_shape, expected_targets, (images, targets) in zip(self.expected_image_shapes_batches, self.expected_targets_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
Expand Down Expand Up @@ -153,32 +137,13 @@ def test_adapt_dataset_segmentation(self):
train_data=self.dataset,
val_data=self.dataset,
class_names=list(map(str, range(6))),
image_channels=ImageChannels.from_str("RGB"),
use_cache=True,
is_batch=False,
)
analyzer_ds.run()

loader = DataLoader(self.dataset, batch_size=2, collate_fn=SegmentationDatasetAdapterCollateFN(config_path=analyzer_ds.config.cache_path, n_classes=6))

for expected_images_shape, expected_masks, (images, masks) in zip(self.expected_image_shapes_batches, self.expected_masks_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
self.assertTrue((masks == expected_masks).all()) # Checking that the masks are as expected

def test_overriding_collate_segmentation(self):
loader = DataLoader(self.dataset, batch_size=2)

# Run the analysis on DATALOADER
analyzer_ds = SegmentationAnalysisManager(
report_title="test_overriding_collate_segmentation",
train_data=loader,
val_data=loader,
class_names=list(map(str, range(6))),
use_cache=True,
is_batch=True,
)
analyzer_ds.run()

loader.collate_fn = SegmentationDatasetAdapterCollateFN(base_collate_fn=loader.collate_fn, config_path=analyzer_ds.config.cache_path, n_classes=6)
loader = SegmentationDataloaderAdapterFactory.from_dataset(dataset=self.dataset, config_path=analyzer_ds.data_config.cache_path, batch_size=2)

for expected_images_shape, expected_masks, (images, masks) in zip(self.expected_image_shapes_batches, self.expected_masks_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
Expand All @@ -193,13 +158,13 @@ def test_adapt_dataloader_segmentation(self):
train_data=loader,
val_data=loader,
class_names=list(map(str, range(6))),
image_channels=ImageChannels.from_str("RGB"),
use_cache=True,
is_batch=True,
)
analyzer_ds.run()

# This is required to use the adapter inside the existing Dataloader.
loader = SegmentationDatasetAdapterCollateFN.adapt_dataloader(dataloader=loader, config_path=analyzer_ds.config.cache_path, n_classes=6)
loader = DetectionDataloaderAdapterFactory.from_dataloader(dataloader=loader, config_path=analyzer_ds.data_config.cache_path)

for expected_images_shape, expected_masks, (images, masks) in zip(self.expected_image_shapes_batches, self.expected_masks_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
Expand Down Expand Up @@ -227,40 +192,15 @@ def test_adapt_dataset_classification(self):
train_data=self.dataset,
val_data=self.dataset,
class_names=list(map(str, range(6))),
image_channels=ImageChannels.from_str("RGB"),
images_extractor="[0]",
labels_extractor="[1]",
use_cache=True,
is_batch=False,
)
analyzer_ds.run()

loader = DataLoader(
self.dataset, batch_size=2, collate_fn=ClassificationDatasetAdapterCollateFN(config_path=analyzer_ds.config.cache_path, n_classes=6)
)

for expected_images_shape, expected_labels, (images, labels) in zip(self.expected_image_shapes_batches, self.expected_labels_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
self.assertTrue(torch.equal(labels, expected_labels))

def test_adapt_dataloader_override_collate_classification(self):

loader = DataLoader(self.dataset, batch_size=2)

analyzer_ds = ClassificationAnalysisManager(
report_title="test_adapt_dataloader_override_collate_classification",
train_data=loader,
val_data=loader,
class_names=list(map(str, range(6))),
images_extractor="[0]",
labels_extractor="[1]",
use_cache=True,
is_batch=True,
)
analyzer_ds.run()

# This is required to use the adapter inside the existing Dataloader.
# `base_collate_fn=loader.base_collate_fn` ensure to still take into account any collate_fn that was passed to the Dataloader
loader.collate_fn = ClassificationDatasetAdapterCollateFN(base_collate_fn=loader.collate_fn, config_path=analyzer_ds.config.cache_path, n_classes=6)
loader = ClassificationDataloaderAdapterFactory.from_dataset(dataset=self.dataset, config_path=analyzer_ds.data_config.cache_path, batch_size=2)

for expected_images_shape, expected_labels, (images, labels) in zip(self.expected_image_shapes_batches, self.expected_labels_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
Expand All @@ -275,15 +215,15 @@ def test_adapt_dataloader_classification(self):
train_data=loader,
val_data=loader,
class_names=list(map(str, range(6))),
image_channels=ImageChannels.from_str("RGB"),
images_extractor="[0]",
labels_extractor="[1]",
use_cache=True,
is_batch=True,
)
analyzer_ds.run()

# This is required to use the adapter inside the existing Dataloader.
loader = ClassificationDatasetAdapterCollateFN.adapt_dataloader(dataloader=loader, config_path=analyzer_ds.config.cache_path, n_classes=6)
loader = ClassificationDataloaderAdapterFactory.from_dataloader(dataloader=loader, config_path=analyzer_ds.data_config.cache_path)

for expected_images_shape, expected_labels, (images, labels) in zip(self.expected_image_shapes_batches, self.expected_labels_batches, loader):
self.assertEqual(images.shape, expected_images_shape)
Expand Down

0 comments on commit fef6347

Please sign in to comment.