-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Open
Labels
Description
I looked briefly at the issues and it seems the problem occurs with other methods as well, I'm using a crop size of (1024,1024) and scale = (2048,1024), all divisible by 32.
the error occurs at evaluation and inference:
[/content/mmsegmentation/mmseg/models/backbones/ddrnet.py](https://localhost:8080/#) in forward(self, x)
191 comp_c = self.compression_1(self.relu(x_c))
192 x_c += self.down_1(self.relu(x_s))
--> 193 x_s += resize(
194 comp_c,
195 size=out_size,
RuntimeError: The size of tensor a (182) must match the size of tensor b (181) at non-singleton dimension 3
here's the config:
crop_size = (832, 832)
scale = (2048, 832)
test_scale = (1024, 1024)
data_root = 'dataxseg'
dataset_type = 'TextDet1Dataset'
save_interval = 500
max_iters = 6000
val_interval= 500
load_from = None
batch_size=16
startlr = 0.0005
min_lr=1e-06
num_workers=2
class_weight = [
0.8, 1.1
]
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/pretrain/ddrnet23s-in1kpre_3rdparty-1ccac5b1.pth' # noqa
data_preprocessor = dict(
type='SegDataPreProcessor',
size=crop_size,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='DDRNet',
in_channels=3,
channels=32,
ppm_channels=128,
norm_cfg=norm_cfg,
align_corners=False,
init_cfg=dict(type='Pretrained', checkpoint=checkpoint)),
decode_head=dict(
type='DDRHead',
in_channels=32 * 4,
channels=64,
dropout_ratio=0.,
num_classes=2,
align_corners=False,
norm_cfg=norm_cfg,
loss_decode=[
dict(
type='OhemCrossEntropy',
thres=0.9,
min_kept=131072,
class_weight=class_weight,
loss_weight=1.0),
dict(
type='OhemCrossEntropy',
thres=0.9,
min_kept=131072,
class_weight=class_weight,
loss_weight=0.4),
]),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))
train_dataloader = dict(batch_size=batch_size, num_workers=num_workers)
# optimizer
# optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optimizer = dict(lr=startlr, type='AdamW', weight_decay=0.0005)
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type='PolyLR',
eta_min=min_lr,
power=0.9,
begin=0,
end=max_iters,
by_epoch=False)
]
# training schedule for 120k
train_cfg = dict(
type='IterBasedTrainLoop', max_iters=max_iters, val_interval=val_interval)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(
type='CheckpointHook', by_epoch=False, interval=save_interval),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))
randomness = dict(seed=304)
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
data_root=data_root,
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=scale, type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='PackSegInputs'),
],
type=dataset_type),
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
iou_metrics=[
'mIoU',
], type='IoUMetric')
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=test_scale, type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='PackSegInputs'),
]
train_cfg = dict(
max_iters=max_iters, type='IterBasedTrainLoop', val_interval=val_interval)
train_dataloader = dict(
batch_size=batch_size,
dataset=dict(
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
data_root=data_root,
pipeline=[
dict(type='LoadImageFromFile'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(
keep_ratio=True,
ratio_range=(
0.5,
2.0,
),
scale=scale,
type='RandomResize'),
dict(
cat_max_ratio=0.75, crop_size=crop_size, type='RandomCrop'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
],
type=dataset_type),
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=True, type='InfiniteSampler'))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(
keep_ratio=True,
ratio_range=(
0.5,
2.0,
),
scale=scale,
type='RandomResize'),
dict(cat_max_ratio=0.75, crop_size=crop_size, type='RandomCrop'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
]
tta_model = dict(type='SegTTAModel')
tta_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(
transforms=[
[
dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
],
[
dict(direction='horizontal', prob=0.0, type='RandomFlip'),
dict(direction='horizontal', prob=1.0, type='RandomFlip'),
],
[
dict(type='LoadAnnotations'),
],
[
dict(type='PackSegInputs'),
],
],
type='TestTimeAug'),
]
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
data_root=data_root,
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=scale, type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='PackSegInputs'),
],
type=dataset_type),
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
iou_metrics=[
'mIoU',
], type='IoUMetric')
img_ratios = [
0.5,
0.75,
1.0,
1.25,
1.5,
1.75,
]
default_scope = 'mmseg'
env_cfg = dict(
cudnn_benchmark=True,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
log_processor = dict(by_epoch=False)
log_level = 'INFO'
resume = False
tta_model = dict(type='SegTTAModel')
wsz1234567