You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
how to resolve this problem "RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)"
#38
Open
code1007 opened this issue
Nov 30, 2023
· 1 comment
Traceback (most recent call last):
File "train.py", line 91, in
main(cfg)
File "train.py", line 70, in main
trainer.fit(model = model, datamodule = dm)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 514, in fit
self.dispatch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 554, in dispatch
self.accelerator.start_training(self)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in start_training
self.training_type_plugin.start_training(trainer)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 111, in start_training
self._results = trainer.run_train()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 645, in run_train
self.train_loop.run_training_epoch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 493, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 632, in run_training_batch
split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 743, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 293, in training_step
training_step_output = self.trainer.accelerator.training_step(args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 157, in training_step
return self.training_type_plugin.training_step(*args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 122, in training_step
return self.lightning_module.training_step(*args, **kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/model_interface.py", line 81, in training_step
results_dict = self.model(data=data, label=label)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 77, in forward
h = self.layer1(h) #[B, N, 512]
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 24, in forward
x = x + self.attn(self.norm(x))
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/nystrom_attention/nystrom_attention.py", line 82, in forward
q, k, v = self.to_qkv(x).chunk(3, dim = -1)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)
The text was updated successfully, but these errors were encountered:
Traceback (most recent call last):
File "train.py", line 91, in
main(cfg)
File "train.py", line 70, in main
trainer.fit(model = model, datamodule = dm)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 514, in fit
self.dispatch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 554, in dispatch
self.accelerator.start_training(self)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in start_training
self.training_type_plugin.start_training(trainer)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 111, in start_training
self._results = trainer.run_train()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 645, in run_train
self.train_loop.run_training_epoch()
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 493, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 632, in run_training_batch
split_batch, batch_idx, opt_idx, optimizer, self.trainer.hiddens
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 743, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 293, in training_step
training_step_output = self.trainer.accelerator.training_step(args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 157, in training_step
return self.training_type_plugin.training_step(*args)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 122, in training_step
return self.lightning_module.training_step(*args, **kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/model_interface.py", line 81, in training_step
results_dict = self.model(data=data, label=label)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 77, in forward
h = self.layer1(h) #[B, N, 512]
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/sharefiles1/boqiuhan/TransMIL-main/TransMIL-main/models/TransMIL.py", line 24, in forward
x = x + self.attn(self.norm(x))
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/nystrom_attention/nystrom_attention.py", line 82, in forward
q, k, v = self.to_qkv(x).chunk(3, dim = -1)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/boqiuhan/anaconda3/envs/transmil_new/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling
cublasGemmEx( handle, opa, opb, m, n, k, &falpha, a, CUDA_R_16F, lda, b, CUDA_R_16F, ldb, &fbeta, c, CUDA_R_16F, ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)
The text was updated successfully, but these errors were encountered: