Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs in scale_batch_size #2523

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions pytorch_lightning/trainer/training_tricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning.callbacks import GradientAccumulationScheduler
from pytorch_lightning.loggers.base import DummyLogger
from pytorch_lightning.utilities import rank_zero_warn
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.memory import is_oom_error, garbage_collection_cuda

Expand Down Expand Up @@ -108,7 +109,7 @@ def scale_batch_size(self,
model: LightningModule,
mode: str = 'power',
steps_per_trial: int = 3,
init_val: int = 2,
init_val: int = 0,
max_trials: int = 25,
batch_arg_name: str = 'batch_size'):
r"""
Expand All @@ -135,8 +136,14 @@ def scale_batch_size(self,
algorithm is terminated

"""
if not hasattr(model, batch_arg_name):
raise MisconfigurationException(f'Field {batch_arg_name} not found in `model.hparams`')
if not hasattr(model, batch_arg_name) and not hasattr(model.hparams, batch_arg_name):
raise MisconfigurationException(
f'Field {batch_arg_name} not found in both `model` and `model.hparams`')
if hasattr(model, batch_arg_name) and hasattr(model.hparams, batch_arg_name):
rank_zero_warn(
f'Field `model.{batch_arg_name}` and `model.hparams.{batch_arg_name}` are mutually exclusive!'
f'`model.{batch_arg_name}` will be used as the initial batch size for scaling.'
'If this is not the intended behavior, please remove either one.')

if hasattr(model.train_dataloader, 'patch_loader_code'):
raise MisconfigurationException('The batch scaling feature cannot be used with dataloaders'
Expand Down Expand Up @@ -242,17 +249,35 @@ def _adjust_batch_size(trainer,

"""
model = trainer.get_model()
batch_size = getattr(model, batch_arg_name)
if hasattr(model, batch_arg_name):
batch_size = getattr(model, batch_arg_name)
else:
if isinstance(model.hparams, dict):
batch_size = model.hparams[batch_arg_name]
else:
batch_size = getattr(model.hparams, batch_arg_name)
if value:
setattr(model, batch_arg_name, value)
if hasattr(model, batch_arg_name):
setattr(model, batch_arg_name, value)
else:
if isinstance(model.hparams, dict):
model.hparams[batch_arg_name] = value
else:
setattr(model.hparams, batch_arg_name, value)
new_size = value
if desc:
log.info(f'Batch size {batch_size} {desc}, trying batch size {new_size}')
else:
new_size = int(batch_size * factor)
if desc:
log.info(f'Batch size {batch_size} {desc}, trying batch size {new_size}')
setattr(model, batch_arg_name, new_size)
if hasattr(model, batch_arg_name):
setattr(model, batch_arg_name, new_size)
else:
if isinstance(model.hparams, dict):
model.hparams[batch_arg_name] = new_size
else:
setattr(model.hparams, batch_arg_name, new_size)
return new_size


Expand Down