RuntimeError: Expected canUse32BitIndexMath(input) && canUse32BitIndexMath(output) to be true #2598
Unanswered
qingtong33
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I customized the RepLKNet in mmaction2. It works well when I combine it with TSN. But when I try to combine RepLKNet with TSM, it has problem. The new networks goes well in the training period of the first epoch. There is an error when it begin evaluation.
Traceback (most recent call last):
File "./tools/train.py", line 222, in
main()
File "./tools/train.py", line 218, in main
meta=meta)
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/apis/train.py", line 232, in train_model
runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 136, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 58, in train
self.call_hook('after_train_epoch')
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/runner/base_runner.py", line 317, in call_hook
getattr(hook, fn_name)(self)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/runner/hooks/evaluation.py", line 271, in after_train_epoch
self._do_evaluate(runner)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/runner/hooks/evaluation.py", line 507, in _do_evaluate
gpu_collect=self.gpu_collect)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/engine/test.py", line 78, in multi_gpu_test
result = model(return_loss=False, **data)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 1040, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/parallel/distributed.py", line 165, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0]) # type: ignore
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/models/recognizers/base.py", line 271, in forward
return self.forward_test(imgs, **kwargs)
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/models/recognizers/recognizer2d.py", line 148, in forward_test
return self._do_test(imgs).cpu().numpy()
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/models/recognizers/recognizer2d.py", line 57, in _do_test
x = self.extract_feat(imgs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py", line 119, in new_func
return old_func(*args, **kwargs)
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/models/recognizers/base.py", line 170, in extract_feat
x = self.backbone(imgs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/models/backbones/replknet_tsm_v2.py", line 513, in forward
x = self.forward_features(x)
File "/home/shelter/shelterX/wrl_code/mmaction2/mmaction/models/backbones/replknet_tsm_v2.py", line 490, in forward_features
x = checkpoint.checkpoint(stem_layer, x) # save memory
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/container.py", line 204, in forward
input = module(input)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/shelter/anaconda3/envs/mmlab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 460, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected canUse32BitIndexMath(input) && canUse32BitIndexMath(output) to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
I want to know if this is due to my config file, or how I can resolve this error.
This is my config file:
model = dict(
type='Recognizer2D',
backbone=dict(
type='RepLKNetTSMV2',
pretrained='./checkpoints/RepLKNet-31B_ImageNet-22K-to-1K_224.pth',
large_kernel_sizes=[31, 29, 27, 13],
layers=[2, 2, 18, 2],
channels=[128, 256, 512, 1024],
drop_path_rate=0.5,
small_kernel=5,
num_classes=20,
use_checkpoint=True,
small_kernel_merged=False,
frozen_stages=-1,
shift_div=8,
num_segments=3,
is_shift=True),
cls_head=dict(
type='TSMHead',
num_segments=3,
num_classes=20,
in_channels=1024,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.0,
init_std=0.001,
is_shift=True),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
dataset settings
dataset_type = 'VideoDataset'
data_root = 'data/kinetics400/videos_train'
data_root_val = 'data/kinetics400/videos_val'
ann_file_train = 'data/kinetics400/minikinetics20_train_list_videos.txt'
ann_file_val = 'data/kinetics400/minikinetics20_val_list_videos.txt'
ann_file_test = 'data/kinetics400/minikinetics20_val_list_videos.txt'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
dict(type='DecordInit'),
dict(type='DenseSampleFrames', clip_len=1, frame_interval=1, num_clips=3),
dict(type='DecordDecode'),
dict(
type='MultiScaleCrop',
input_size=224,
scales=(1, 0.875, 0.75, 0.66),
random_crop=False,
max_wh_scale_gap=1),
dict(type='Resize', scale=(224, 224), keep_ratio=False),
dict(type='Flip', flip_ratio=0.5),
dict(
type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs', 'label'])
]
val_pipeline = [
dict(type='DecordInit'),
dict(
type='DenseSampleFrames',
clip_len=1,
frame_interval=1,
num_clips=3,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='CenterCrop', crop_size=224),
dict(
type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
test_pipeline = [
dict(type='DecordInit', num_threads=1),
dict(
type='DenseSampleFrames',
clip_len=1,
frame_interval=1,
num_clips=8,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='TenCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
data = dict(
videos_per_gpu=64,
workers_per_gpu=16,
train=dict(
type=dataset_type,
ann_file=ann_file_train,
data_prefix=data_root,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=data_root_val,
pipeline=val_pipeline),
test=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=data_root_val,
pipeline=test_pipeline)
)
checkpoint_config = dict(interval=5)
log_config = dict(
interval=20,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None #'work_dirs/replknet2d_tsrelplknet_video_minikinetics20_rgb_sdg/latest.pth'
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
evaluation = dict(
interval=1, metrics=['top_k_accuracy', 'mean_class_accuracy'])
optimizer = dict(type='SGD', lr=0.008, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
lr_config = dict(policy='step', step=[40, 60])
total_epochs = 80
work_dir = './work_dirs/replknet2d_relplknetTSM_video_minikinetics20_rgb_sdg/'
gpu_ids = 0
omnisource = False
module_hooks = []
Beta Was this translation helpful? Give feedback.
All reactions