Skip to content

Commit

Permalink
Introduce min_samples param to dataloader_params to allow oversamplin…
Browse files Browse the repository at this point in the history
…g on small number of samples (#838)
  • Loading branch information
BloodAxe authored Apr 17, 2023
1 parent 0fe46cd commit 91b5232
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 6 deletions.
17 changes: 17 additions & 0 deletions documentation/source/Data.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,23 @@ dataset_params:
```

### DataLoaders - Additional params

In addition to the parameters that are supported by the `torch.utils.data.DataLoader` class, SuperGradients also provide additional parameters:

* `min_samples` - When present, this parameter will guarantee that at least `min_samples` items will be processed in each epoch. It is useful when working with small datasets.
To use this option, simply add this parameter to the `dataloader_params` dictionary, and set it to the desired value:
```yaml
train_dataloader: imagenet_resnet50_train
dataset_params:
train_dataloader_params:
batch_size: 4
shuffle: True
min_samples: 1024
```

On the technical side, when this parameter is se, SuperGradients will attach the RandomSampler to the DataLoader, and set it's `num_samples` parameter to `min_samples`.

## Using Custom Datasets

Suppose we already have our own `torch.utils.data.Dataset` class:
Expand Down
17 changes: 16 additions & 1 deletion src/super_gradients/training/dataloaders/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hydra
import numpy as np
import torch
from torch.utils.data import BatchSampler, DataLoader, TensorDataset
from torch.utils.data import BatchSampler, DataLoader, TensorDataset, RandomSampler

import super_gradients
from super_gradients.common.abstractions.abstract_logger import get_logger
Expand Down Expand Up @@ -96,6 +96,14 @@ def _process_dataloader_params(cfg, dataloader_params, dataset, train):
dataloader_params = _process_sampler_params(dataloader_params, dataset, default_dataloader_params)
dataloader_params = _process_collate_fn_params(dataloader_params)

# The following check is needed to gracefully handle the rare but possible case when the dataset length
# is less than the number of workers. In this case DataLoader will crash.
# So we clamp the number of workers to not exceed the dataset length.
num_workers = get_param(dataloader_params, "num_workers")
if num_workers is not None and num_workers > 0:
num_workers = min(num_workers, len(dataset))
dataloader_params["num_workers"] = num_workers

return dataloader_params


Expand All @@ -114,6 +122,13 @@ def _process_sampler_params(dataloader_params, dataset, default_dataloader_param
elif is_dist:
dataloader_params["sampler"] = {"DistributedSampler": {}}
dataloader_params = _instantiate_sampler(dataset, dataloader_params)
elif get_param(dataloader_params, "min_samples") is not None:
min_samples = dataloader_params.pop("min_samples")
if len(dataset) < min_samples:
dataloader_params["sampler"] = RandomSampler(dataset, replacement=True, num_samples=min_samples)
if "shuffle" in dataloader_params.keys():
dataloader_params.pop("shuffle")
logger.info(f"Using min_samples={min_samples}")
if get_param(dataloader_params, "batch_sampler"):
sampler = dataloader_params.pop("sampler")
batch_size = dataloader_params.pop("batch_size")
Expand Down
3 changes: 2 additions & 1 deletion src/super_gradients/training/sg_trainer/sg_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,8 @@ def forward(self, inputs, targets):
num_gpus=get_world_size(),
batch_size=len(inputs),
batch_accumulate=self.batch_accumulate,
len_train_set=len(self.train_loader.dataset),
train_dataset_length=len(self.train_loader.dataset),
train_dataloader_len=len(self.train_loader),
)

try:
Expand Down
10 changes: 6 additions & 4 deletions src/super_gradients/training/utils/sg_trainer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,18 +449,20 @@ def get_callable_param_names(obj: callable) -> Tuple[str]:
return tuple(inspect.signature(obj).parameters)


def log_main_training_params(multi_gpu: MultiGPUMode, num_gpus: int, batch_size: int, batch_accumulate: int, len_train_set: int):
def log_main_training_params(
multi_gpu: MultiGPUMode, num_gpus: int, batch_size: int, batch_accumulate: int, train_dataset_length: int, train_dataloader_len: int
):
"""Log training parameters"""
msg = (
"TRAINING PARAMETERS:\n"
f" - Mode: {multi_gpu.name if multi_gpu else 'Single GPU'}\n"
f" - Number of GPUs: {num_gpus if 'cuda' in device_config.device else 0:<10} ({torch.cuda.device_count()} available on the machine)\n"
f" - Dataset size: {len_train_set:<10} (len(train_set))\n"
f" - Dataset size: {train_dataset_length:<10} (len(train_set))\n"
f" - Batch size per GPU: {batch_size:<10} (batch_size)\n"
f" - Batch Accumulate: {batch_accumulate:<10} (batch_accumulate)\n"
f" - Total batch size: {num_gpus * batch_size:<10} (num_gpus * batch_size)\n"
f" - Effective Batch size: {num_gpus * batch_size * batch_accumulate:<10} (num_gpus * batch_size * batch_accumulate)\n"
f" - Iterations per epoch: {int(len_train_set / (num_gpus * batch_size)):<10} (len(train_set) / total_batch_size)\n"
f" - Gradient updates per epoch: {int(len_train_set / (num_gpus * batch_size * batch_accumulate)):<10} (len(train_set) / effective_batch_size)\n"
f" - Iterations per epoch: {int(train_dataloader_len):<10} (len(train_loader))\n"
f" - Gradient updates per epoch: {int(train_dataloader_len / batch_accumulate):<10} (len(train_loader) / batch_accumulate)\n"
)
logger.info(msg)

0 comments on commit 91b5232

Please sign in to comment.