Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The parameter loc has invalid values #1020

Closed
farnoush-shh opened this issue Apr 7, 2021 · 4 comments
Closed

The parameter loc has invalid values #1020

farnoush-shh opened this issue Apr 7, 2021 · 4 comments

Comments

@farnoush-shh
Copy link

lvae = scvi.model.SCANVI(adata, "Unknown", n_latent=30, n_layers=2)
[12]:

lvae.train(max_epochs=100, n_samples_per_label=100)
INFO Training for 100 epochs.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.
Epoch 52/100: 51%|█████ | 51/100 [1:59:47<1:55:05, 140.94s/it, loss=565, v_num=1]


ValueError Traceback (most recent call last)
in
----> 1 lvae.train(max_epochs=100, n_samples_per_label=100)

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/model/_scanvi.py in train(self, max_epochs, n_samples_per_label, check_val_every_n_epoch, train_size, validation_size, batch_size, use_gpu, plan_kwargs, **trainer_kwargs)
376 **trainer_kwargs,
377 )
--> 378 return runner()

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/train/trainrunner.py in call(self)
73 self.trainer.fit(self.training_plan, train_dl)
74 else:
---> 75 self.trainer.fit(self.training_plan, train_dl, val_dl)
76 try:
77 self.model.history
= self.trainer.logger.history

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/train/_trainer.py in fit(self, *args, **kwargs)
150 message="you defined a validation_step but have no val_dataloader",
151 )
--> 152 super().fit(*args, **kwargs)

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath start_training or start_testing or start_predicting
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
635 with self.profiler.profile("run_training_epoch"):
636 # run train epoch
--> 637 self.train_loop.run_training_epoch()
638
639 if self.max_steps and self.max_steps <= self.global_step:

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_epoch(self)
491 # ------------------------------------
492 with self.trainer.profiler.profile("run_training_batch"):
--> 493 batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
494
495 # when returning -1 from train_step, we end epoch early

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_batch(self, batch, batch_idx, dataloader_idx)
653
654 # optimizer step
--> 655 self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
656
657 else:

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
432 on_tpu=self.trainer._device_type == DeviceType.TPU and _TPU_AVAILABLE,
433 using_native_amp=using_native_amp,
--> 434 using_lbfgs=is_lbfgs,
435 )
436

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py in optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, on_tpu, using_native_amp, using_lbfgs)
1385 # wraps into LightingOptimizer only for running step
1386 optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer, optimizer_idx)
-> 1387 optimizer.step(closure=optimizer_closure)
1388
1389 def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py in step(self, closure, *args, **kwargs)
212 profiler_name = f"optimizer_step_and_closure_{self._optimizer_idx}"
213
--> 214 self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
215 self._total_optimizer_step_calls += 1
216

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py in __optimizer_step(self, closure, profiler_name, **kwargs)
132
133 with trainer.profiler.profile(profiler_name):
--> 134 trainer.accelerator.optimizer_step(optimizer, self._optimizer_idx, lambda_closure=closure, **kwargs)
135
136 def step(self, *args, closure: Optional[Callable] = None, **kwargs):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in optimizer_step(self, optimizer, opt_idx, lambda_closure, **kwargs)
275 )
276 if make_optimizer_step:
--> 277 self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs)
278 self.precision_plugin.post_optimizer_step(optimizer, opt_idx)
279 self.training_type_plugin.post_optimizer_step(optimizer, opt_idx, **kwargs)

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in run_optimizer_step(self, optimizer, optimizer_idx, lambda_closure, **kwargs)
280
281 def run_optimizer_step(self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs):
--> 282 self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs)
283
284 def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None:

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in optimizer_step(self, optimizer, lambda_closure, **kwargs)
161
162 def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs):
--> 163 optimizer.step(closure=lambda_closure, **kwargs)

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/optim/optimizer.py in wrapper(*args, **kwargs)
87 profile_name = "Optimizer.step#{}.step".format(obj.class.name)
88 with torch.autograd.profiler.record_function(profile_name):
---> 89 return func(*args, **kwargs)
90 return wrapper
91

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/autograd/grad_mode.py in decorate_context(*args, **kwargs)
25 def decorate_context(*args, **kwargs):
26 with self.class():
---> 27 return func(*args, **kwargs)
28 return cast(F, decorate_context)
29

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/optim/adam.py in step(self, closure)
64 if closure is not None:
65 with torch.enable_grad():
---> 66 loss = closure()
67
68 for group in self.param_groups:

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in train_step_and_backward_closure()
648 def train_step_and_backward_closure():
649 result = self.training_step_and_backward(
--> 650 split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens
651 )
652 return None if result is None else result.loss

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
741 with self.trainer.profiler.profile("training_step_and_backward"):
742 # lightning module hook
--> 743 result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
744 self._curr_step_result = result
745

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py in training_step(self, split_batch, batch_idx, opt_idx, hiddens)
291 model_ref._results = Result()
292 with self.trainer.profiler.profile("training_step"):
--> 293 training_step_output = self.trainer.accelerator.training_step(args)
294 self.trainer.accelerator.post_training_step()
295

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py in training_step(self, args)
154
155 with self.precision_plugin.train_step_context(), self.training_type_plugin.train_step_context():
--> 156 return self.training_type_plugin.training_step(*args)
157
158 def post_training_step(self):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in training_step(self, *args, **kwargs)
123
124 def training_step(self, *args, **kwargs):
--> 125 return self.lightning_module.training_step(*args, **kwargs)
126
127 def post_training_step(self):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/train/_trainingplans.py in training_step(self, batch, batch_idx, optimizer_idx)
489 )
490 input_kwargs.update(self.loss_kwargs)
--> 491 _, _, scvi_losses = self.forward(full_dataset, loss_kwargs=input_kwargs)
492 loss = scvi_losses.loss
493 reconstruction_loss = scvi_losses.reconstruction_loss

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/train/_trainingplans.py in forward(self, *args, **kwargs)
103 def forward(self, *args, **kwargs):
104 """Passthrough to model.forward()."""
--> 105 return self.module(*args, **kwargs)
106
107 def training_step(self, batch, batch_idx, optimizer_idx=0):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/module/base/_decorators.py in auto_transfer_args(self, *args, **kwargs)
30 # decorator only necessary after training
31 if self.training:
---> 32 return fn(self, *args, **kwargs)
33
34 device = list(set(p.device for p in self.parameters()))

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/module/base/_base_module.py in forward(self, tensors, get_inference_input_kwargs, get_generative_input_kwargs, inference_kwargs, generative_kwargs, loss_kwargs, compute_loss)
131 tensors, **get_inference_input_kwargs
132 )
--> 133 inference_outputs = self.inference(**inference_inputs, **inference_kwargs)
134 generative_inputs = self._get_generative_input(
135 tensors, inference_outputs, **get_generative_input_kwargs

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/module/base/_decorators.py in auto_transfer_args(self, *args, **kwargs)
30 # decorator only necessary after training
31 if self.training:
---> 32 return fn(self, *args, **kwargs)
33
34 device = list(set(p.device for p in self.parameters()))

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/module/_vae.py in inference(self, x, batch_index, cont_covs, cat_covs, n_samples)
229 else:
230 categorical_input = tuple()
--> 231 qz_m, qz_v, z = self.z_encoder(encoder_input, batch_index, *categorical_input)
232 ql_m, ql_v, library_encoded = self.l_encoder(
233 encoder_input, batch_index, *categorical_input

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/nn/_base_components.py in forward(self, x, *cat_list)
284 q_m = self.mean_encoder(q)
285 q_v = torch.exp(self.var_encoder(q)) + self.var_eps
--> 286 latent = self.z_transformation(reparameterize_gaussian(q_m, q_v))
287 return q_m, q_v, latent
288

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/scvi/nn/_base_components.py in reparameterize_gaussian(mu, var)
11
12 def reparameterize_gaussian(mu, var):
---> 13 return Normal(mu, var.sqrt()).rsample()
14
15

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/distributions/normal.py in init(self, loc, scale, validate_args)
48 else:
49 batch_shape = self.loc.size()
---> 50 super(Normal, self).init(batch_shape, validate_args=validate_args)
51
52 def expand(self, batch_shape, _instance=None):

~/anaconda3/envs/scvi-env/lib/python3.7/site-packages/torch/distributions/distribution.py in init(self, batch_shape, event_shape, validate_args)
51 continue # skip checking lazily-constructed args
52 if not constraint.check(getattr(self, param)).all():
---> 53 raise ValueError("The parameter {} has invalid values".format(param))
54 super(Distribution, self).init()
55

ValueError: The parameter loc has invalid values

?

@321356766
Copy link

Hi all,

Just adding to this comment, I have received the same error in a totalVI workflow. Any tips for things to look for during troubleshoot?

@adamgayoso
Copy link
Member

These issues are likely due to some property of the data being used. It would be helpful to learn more about the data, as well as if you could post scripts of your workflows.

@adamgayoso
Copy link
Member

Closing this due to inactivity. For further assistance please feel free to post on our Discourse forum https://discourse.scvi-tools.org/

@QiangShiPKU
Copy link

lvae.train(max_epochs=50, use_gpu=True) succeed,
lvae.train(max_epochs=200, use_gpu=True) fail with following output
INFO Training for 200 epochs. Epoch 56/200: 28%|██▊ | 55/200 [3:58:04<10:27:38, 259.72s/it, loss=515, v_num=1]
and error
ValueError: The parameter loc has invalid values.
So, max_epochs should be no more than 50?

In addition, the same error in SDV may help

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants