You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2024-06-20 09:55:25,806 INFO [train.py:1256] (4/8) Saving batch to exp/pts7_aatmd4k_ctc_bs/batch-c33f4584-b23b-c1d8-493c-d01609de8895.pt
2024-06-20 09:55:25,868 INFO [train.py:1262] (4/8) features shape: torch.Size([157, 477, 80])
2024-06-20 09:55:25,870 INFO [train.py:1267] (4/8) num tokens: 2872
Traceback (most recent call last):
File "./pruned_transducer_stateless7_ctc_bs/train.py", line 1335, in
main()
File "./pruned_transducer_stateless7_ctc_bs/train.py", line 1326, in main
mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True)
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 4 terminated with the following error:
Traceback (most recent call last):
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/train.py", line 1199, in run
train_one_epoch(
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/train.py", line 844, in train_one_epoch
loss, loss_info = compute_loss(
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/train.py", line 681, in compute_loss
simple_loss, pruned_loss, ctc_output = model(
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 963, in forward
output = self.module(*inputs[0], **kwargs[0])
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/model.py", line 143, in forward
encoder_out = self.lconv(
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/lconv.py", line 103, in forward
x = x.masked_fill(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
RuntimeError: The expanded size of the tensor (118) must match the existing size (117) at non-singleton dimension 2. Target sizes: [157, 768, 118]. Tensor sizes: [157, 1, 117]
2024-06-20 09:55:25,806 INFO [train.py:1256] (4/8) Saving batch to exp/pts7_aatmd4k_ctc_bs/batch-c33f4584-b23b-c1d8-493c-d01609de8895.pt
2024-06-20 09:55:25,868 INFO [train.py:1262] (4/8) features shape: torch.Size([157, 477, 80])
2024-06-20 09:55:25,870 INFO [train.py:1267] (4/8) num tokens: 2872
Traceback (most recent call last):
File "./pruned_transducer_stateless7_ctc_bs/train.py", line 1335, in
main()
File "./pruned_transducer_stateless7_ctc_bs/train.py", line 1326, in main
mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True)
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 4 terminated with the following error:
Traceback (most recent call last):
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/train.py", line 1199, in run
train_one_epoch(
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/train.py", line 844, in train_one_epoch
loss, loss_info = compute_loss(
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/train.py", line 681, in compute_loss
simple_loss, pruned_loss, ctc_output = model(
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 963, in forward
output = self.module(*inputs[0], **kwargs[0])
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/model.py", line 143, in forward
encoder_out = self.lconv(
File "/home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/my-asr/kell/backstreet/icefall-master/egs/blueberry/ASR/pruned_transducer_stateless7_ctc_bs/lconv.py", line 103, in forward
x = x.masked_fill(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
RuntimeError: The expanded size of the tensor (118) must match the existing size (117) at non-singleton dimension 2. Target sizes: [157, 768, 118]. Tensor sizes: [157, 1, 117]
python -m k2.version
Collecting environment information...
k2 version: 1.24.4
Build type: Release
Git SHA1: f6919c0ddb311bea7b53a50f3afdcb3c18b8ccc8
Git date: Sat Feb 10 09:23:09 2024
Cuda used to build k2: 11.3
cuDNN used to build k2: 8.3.2
Python version used to build k2: 3.8
OS used to build k2: CentOS Linux release 7.9.2009 (Core)
CMake version: 3.28.1
GCC version: 9.3.1
CMAKE_CUDA_FLAGS: -Wno-deprecated-gpu-targets -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_50,code=sm_50 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_60,code=sm_60 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_61,code=sm_61 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_70,code=sm_70 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_75,code=sm_75 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_80,code=sm_80 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_86,code=sm_86 -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=integer_sign_change,--diag_suppress=useless_using_declaration,--diag_suppress=set_but_not_used,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=implicit_return_from_non_void_function,--diag_suppress=unsigned_compare_with_zero,--diag_suppress=declared_but_not_referenced,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -D_GLIBCXX_USE_CXX11_ABI=0 --compiler-options -Wall --compiler-options -Wno-strict-overflow --compiler-options -Wno-unknown-pragmas
CMAKE_CXX_FLAGS: -D_GLIBCXX_USE_CXX11_ABI=0 -Wno-unused-variable -Wno-strict-overflow
PyTorch version used to build k2: 1.11.0+cu113
PyTorch is using Cuda: 11.3
NVTX enabled: True
With CUDA: True
Disable debug: True
Sync kernels : False
Disable checks: False
Max cpu memory allocate: 214748364800 bytes (or 200.0 GB)
k2 abort: False
file: /home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/k2/version/version.py
_k2.file: /home/kell/anaconda3/envs/icefall/lib/python3.8/site-packages/_k2.cpython-38-x86_64-linux-gnu.so
The text was updated successfully, but these errors were encountered: