Skip to content

CUDA error: an illegal memory access was encountered #42

Closed
@rassabin

Description

@rassabin
sys.platform: linux
Python: 3.7.7 (default, May  7 2020, 21:25:33) [GCC 7.3.0]
CUDA available: True
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 10.0, V10.0.130
GPU 0,1: GeForce GTX 1080 Ti
GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
PyTorch: 1.4.0
PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - Intel(R) Math Kernel Library Version 2020.0.1 Product Build 20200208 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v0.21.1 (Git Hash 7d2fd500bc78936d1d648ca713b901012f470dbc)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CUDA Runtime 10.0
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
  - CuDNN 7.6.3
  - Magma 2.5.1
  - Build settings: BLAS=MKL, BUILD_NAMEDTENSOR=OFF, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -fopenmp -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Wno-stringop-overflow, DISABLE_NUMA=1, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_STATIC_DISPATCH=OFF, 

TorchVision: 0.5.0
OpenCV: 4.2.0
MMCV: 1.0.4
MMSegmentation: 0.5.0+b57fb2b
MMCV Compiler: GCC 7.5
MMCV CUDA Compiler: 10.0

Error was encountered during training process with condfigs:

Config:
norm_cfg = dict(type='BN', requires_grad=True)
model = dict(
    type='EncoderDecoder',
    pretrained='open-mmlab://resnet50_v1c',
    backbone=dict(
        type='ResNetV1c',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        dilations=(1, 1, 2, 4),
        strides=(1, 2, 1, 1),
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=False,
        style='pytorch',
        contract_dilation=True),
    decode_head=dict(
        type='PSPHead',
        in_channels=2048,
        in_index=3,
        channels=512,
        pool_scales=(1, 2, 3, 6),
        dropout_ratio=0.1,
        num_classes=9,
        norm_cfg=dict(type='BN', requires_grad=True),
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
    auxiliary_head=dict(
        type='FCNHead',
        in_channels=1024,
        in_index=2,
        channels=256,
        num_convs=1,
        concat_input=False,
        dropout_ratio=0.1,
        num_classes=9,
        norm_cfg=dict(type='BN', requires_grad=True),
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
train_cfg = dict()
test_cfg = dict(mode='whole')
dataset_type = 'Aircraft'
data_root = '/mmdetection_aircraft/data/segm2/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    dict(type='Resize', img_scale=(640, 480), ratio_range=(0.5, 2.0)),
    dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(640, 480),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=1,
    train=dict(
        type='Aircraft',
        data_root='/mmdetection_aircraft/data/segm2/',
        img_dir='JPEGImages',
        ann_dir='PaletteClass',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations'),
            dict(type='Resize', img_scale=(640, 480), ratio_range=(0.5, 2.0)),
            dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
            dict(type='RandomFlip', flip_ratio=0.5),
            dict(type='PhotoMetricDistortion'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_semantic_seg'])
        ],
        split='train.txt'),
    val=dict(
        type='Aircraft',
        data_root='/mmdetection_aircraft/data/segm2/',
        img_dir='JPEGImages',
        ann_dir='PaletteClass',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(640, 480),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        split='val.txt'),
    test=dict(
        type='Aircraft',
        data_root='/mmdetection_aircraft/data/segm2/',
        img_dir='JPEGImages',
        ann_dir='PaletteClass',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(640, 480),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        split='val.txt'))
log_config = dict(
    interval=1, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer_config = dict()
lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=False)
total_iters = 400
checkpoint_config = dict(by_epoch=False, interval=200)
evaluation = dict(interval=1, metric='mIoU')
work_dir = './work_dirs/pspnet'
seed = 0
gpu_ids = [1]

The script take an approximately 4-5GB of GPU from 11GB available and return this error:

#ERROR

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-8-fec2661e1f4c> in <module>
     16 mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
     17 train_segmentor(model, datasets, cfg, distributed=False, validate=True, 
---> 18                 meta=dict())

~/mmsegmentation/mmseg/apis/train.py in train_segmentor(model, dataset, cfg, distributed, validate, timestamp, meta)
    104     elif cfg.load_from:
    105         runner.load_checkpoint(cfg.load_from)
--> 106     runner.run(data_loaders, cfg.workflow, cfg.total_iters)

~/miniconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py in run(self, data_loaders, workflow, max_iters, **kwargs)
    117                     if mode == 'train' and self.iter >= max_iters:
    118                         break
--> 119                     iter_runner(iter_loaders[i], **kwargs)
    120 
    121         time.sleep(1)  # wait for some hooks like loggers to finish

~/miniconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py in train(self, data_loader, **kwargs)
     53         self.call_hook('before_train_iter')
     54         data_batch = next(data_loader)
---> 55         outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
     56         if not isinstance(outputs, dict):
     57             raise TypeError('model.train_step() must return a dict')

~/miniconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/parallel/data_parallel.py in train_step(self, *inputs, **kwargs)
     29 
     30         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
---> 31         return self.module.train_step(*inputs[0], **kwargs[0])
     32 
     33     def val_step(self, *inputs, **kwargs):

~/mmsegmentation/mmseg/models/segmentors/base.py in train_step(self, data_batch, optimizer, **kwargs)
    150         #data_batch['gt_semantic_seg'] = data_batch['gt_semantic_seg'][:,:,:,:,0]
    151         #print(data_batch['gt_semantic_seg'].shape)
--> 152         losses = self.forward_train(**data_batch, **kwargs)
    153         loss, log_vars = self._parse_losses(losses)
    154 

~/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py in forward_train(self, img, img_metas, gt_semantic_seg)
    155 
    156         loss_decode = self._decode_head_forward_train(x, img_metas,
--> 157                                                       gt_semantic_seg)
    158         losses.update(loss_decode)
    159 

~/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py in _decode_head_forward_train(self, x, img_metas, gt_semantic_seg)
     99         loss_decode = self.decode_head.forward_train(x, img_metas,
    100                                                      gt_semantic_seg,
--> 101                                                      self.train_cfg)
    102 
    103         losses.update(add_prefix(loss_decode, 'decode'))

~/mmsegmentation/mmseg/models/decode_heads/decode_head.py in forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg)
    184         """
    185         seg_logits = self.forward(inputs)
--> 186         losses = self.losses(seg_logits, gt_semantic_seg)
    187         return losses
    188 

~/miniconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py in new_func(*args, **kwargs)
    162                                 'method of nn.Module')
    163             if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
--> 164                 return old_func(*args, **kwargs)
    165             # get the arg spec of the decorated method
    166             args_info = getfullargspec(old_func)

~/mmsegmentation/mmseg/models/decode_heads/decode_head.py in losses(self, seg_logit, seg_label)
    229             seg_label,
    230             weight=seg_weight,
--> 231             ignore_index=self.ignore_index)
    232         loss['acc_seg'] = accuracy(seg_logit, seg_label)
    233         return loss

~/miniconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

~/mmsegmentation/mmseg/models/losses/cross_entropy_loss.py in forward(self, cls_score, label, weight, avg_factor, reduction_override, **kwargs)
    175             class_weight=class_weight,
    176             reduction=reduction,
--> 177             avg_factor=avg_factor)
    178         return loss_cls

~/mmsegmentation/mmseg/models/losses/cross_entropy_loss.py in cross_entropy(pred, label, weight, class_weight, reduction, avg_factor, ignore_index)
     28         weight = weight.float()
     29     loss = weight_reduce_loss(
---> 30         loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
     31 
     32     return loss

~/mmsegmentation/mmseg/models/losses/utils.py in weight_reduce_loss(loss, weight, reduction, avg_factor)
     45     # if avg_factor is not specified, just reduce the loss
     46     if avg_factor is None:
---> 47         loss = reduce_loss(loss, reduction)
     48     else:
     49         # if reduction is mean, then average the loss by avg_factor

~/mmsegmentation/mmseg/models/losses/utils.py in reduce_loss(loss, reduction)
     19         return loss
     20     elif reduction_enum == 1:
---> 21         return loss.mean()
     22     elif reduction_enum == 2:
     23         return loss.sum()

RuntimeError: CUDA error: an illegal memory access was encountered

But if i reduce the size the image size twice with the same images per GPU (2) ,script takes approxiamtely 2GB from GPU and everything works fine.
Also,i want to add that using another PyTorch script with my own Dataloader i'm able to fill in GPU on full (11GB) by training process with the same Torch version and the same hardware.

Metadata

Metadata

Assignees

Labels

documentationImprovements or additions to documentation

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions