You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I used a custom dataset to finetune the model, and all losses were present and decreased normally. After 70 epochs, I conducted a test and the results showed that objects could be detected correctly. However, my evaluation metric has always been -1.000. What is the reason for this?
Here is my configuration:
`base = ('../../third_party/mmyolo/configs/yolov8/'
'yolov8_l_syncbn_fast_8xb16-500e_coco.py')
custom_imports = dict(imports=['yolo_world'], allow_failed_imports=False)
I used a custom dataset to finetune the model, and all losses were present and decreased normally. After 70 epochs, I conducted a test and the results showed that objects could be detected correctly. However, my evaluation metric has always been -1.000. What is the reason for this?
Here is my configuration:
`base = ('../../third_party/mmyolo/configs/yolov8/'
'yolov8_l_syncbn_fast_8xb16-500e_coco.py')
custom_imports = dict(imports=['yolo_world'], allow_failed_imports=False)
hyper-parameters
num_classes = 1
num_training_classes = 1
max_epochs = 500 # Maximum training epochs
close_mosaic_epochs = 30
save_epoch_intervals = 50
text_channels = 512
neck_embed_channels = [128, 256, base.last_stage_out_channels // 2]
neck_num_heads = [4, 8, base.last_stage_out_channels // 2 // 32]
base_lr = 1e-3
weight_decay = 0.0005
train_batch_size_per_gpu = 1
load_from = r'D:\plant_detection\YOLO-World-master\weights\yolo_world_v2_l_clip_large_o365v1_goldg_pretrain_800ft-9df82e55.pth' # 'pretrained_models/yolo_world_l_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_cc3mlite_train-ca93cd1f.pth'
text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
text_model_name = r'D:\plant_detection\YOLO-World-master\configs\clip-vit-base-patch32'
persistent_workers = False
model settings
model = dict(type='YOLOWorldDetector',
mm_neck=True,
num_train_classes=num_training_classes,
num_test_classes=num_classes,
data_preprocessor=dict(type='YOLOWDetDataPreprocessor'),
backbone=dict(delete=True,
type='MultiModalYOLOBackbone',
image_model={{base.model.backbone}},
text_model=dict(type='HuggingCLIPLanguageBackbone',
model_name=text_model_name,
frozen_modules=['all'])),
neck=dict(type='YOLOWorldPAFPN',
guide_channels=text_channels,
embed_channels=neck_embed_channels,
num_heads=neck_num_heads,
block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')),
bbox_head=dict(type='YOLOWorldHead',
head_module=dict(
type='YOLOWorldHeadModule',
use_bn_head=True,
embed_dims=text_channels,
num_classes=num_training_classes)),
train_cfg=dict(assigner=dict(num_classes=num_training_classes)))
dataset settings
text_transform = [
dict(type='RandomLoadText',
num_neg_samples=(num_classes, num_classes),
max_num_samples=num_training_classes,
padding_to_max=True,
padding_value=''),
dict(type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction', 'texts'))
]
mosaic_affine_transform = [
dict(type='MultiModalMosaic',
img_scale=base.img_scale,
pad_val=114.0,
pre_transform=base.pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_aspect_ratio=100.,
scaling_ratio_range=(1 - base.affine_scale, 1 + base.affine_scale),
# img_scale is (width, height)
border=(-base.img_scale[0] // 2, -base.img_scale[1] // 2),
border_val=(114, 114, 114))
]
train_pipeline = [
*base.pre_transform, mosaic_affine_transform,
dict(type='YOLOv5MultiModalMixUp',
prob=base.mixup_prob,
pre_transform=[base.pre_transform, *mosaic_affine_transform]),
*base.last_transform[:-1], text_transform
]
train_pipeline_stage2 = [base.train_pipeline_stage2[:-1], *text_transform]
coco_train_dataset = dict(delete=True,
type='MultiModalDataset',
dataset=dict(
type='YOLOv5CocoDataset',
metainfo=dict(classes=['crop']),
data_root=r'C:\Users\lenovo\Desktop\self_coco',
ann_file=r'C:\Users\lenovo\Desktop\self_coco\train.json',
data_prefix=dict(img=r'C:\Users\lenovo\Desktop\self_coco\train'),
filter_cfg=dict(filter_empty_gt=False,
min_size=32)),
class_text_path=r'C:\Users\lenovo\Desktop\self_coco\coco_class_text.json',
pipeline=train_pipeline)
train_dataloader = dict(persistent_workers=persistent_workers,
batch_size=train_batch_size_per_gpu,
collate_fn=dict(type='yolow_collate'),
dataset=coco_train_dataset)
test_pipeline = [
*base.test_pipeline[:-1],
dict(type='LoadText'),
dict(type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'texts'))
]
coco_val_dataset = dict(
delete=True,
type='MultiModalDataset',
dataset=dict(type='YOLOv5CocoDataset',
metainfo=dict(classes=['crop']),
data_root=r'C:\Users\lenovo\Desktop\self_coco',
ann_file=r'C:\Users\lenovo\Desktop\self_coco\val.json',
data_prefix=dict(img=r'C:\Users\lenovo\Desktop\self_coco\val'),
filter_cfg=dict(filter_empty_gt=False, min_size=32)),
class_text_path=r'C:\Users\lenovo\Desktop\self_coco\coco_class_text.json',
pipeline=test_pipeline)
val_dataloader = dict(dataset=coco_val_dataset)
test_dataloader = val_dataloader
training settings
default_hooks = dict(param_scheduler=dict(scheduler_type='linear',
lr_factor=0.01,
max_epochs=max_epochs),
checkpoint=dict(max_keep_ckpts=-1,
save_best=True,
interval=save_epoch_intervals))
custom_hooks = [
dict(type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
update_buffers=True,
strict_load=False,
priority=49),
dict(type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - close_mosaic_epochs,
switch_pipeline=train_pipeline_stage2)
]
train_cfg = dict(max_epochs=max_epochs,
val_interval=1,
dynamic_intervals=[((max_epochs - close_mosaic_epochs),
base.val_interval_stage2)])
optim_wrapper = dict(optimizer=dict(
delete=True,
type='SGD',
lr=base_lr,
momentum=0.937,
nesterov=True,
weight_decay=weight_decay,
batch_size_per_gpu=train_batch_size_per_gpu),
paramwise_cfg=dict(
custom_keys={
'backbone.text_model': dict(lr_mult=0.01),
'logit_scale': dict(weight_decay=0.0)
}),
constructor='YOLOWv5OptimizerConstructor')
evaluation settings
val_evaluator = dict(delete=True,
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=r'C:\Users\lenovo\Desktop\self_coco\val.json',
metric='bbox')
`
This is my console output:
12/10 16:18:08 - mmengine - INFO - Epoch(train) [1][ 50/240] base_lr: 1.0000e-03 lr: 4.9000e-05 eta: 1 day, 0:42:29 time: 0.7416 data_time: 0.4533 memory: 3570 grad_norm: nan loss: 13.6729 loss_cls: 7.2828 loss_bbox: 3.7713 loss_dfl: 2.6188 12/10 16:18:14 - mmengine - INFO - Epoch(train) [1][100/240] base_lr: 1.0000e-03 lr: 9.9000e-05 eta: 14:12:23 time: 0.1115 data_time: 0.0060 memory: 1846 grad_norm: 755.3231 loss: 13.5571 loss_cls: 7.1307 loss_bbox: 3.8186 loss_dfl: 2.6078 12/10 16:18:19 - mmengine - INFO - Epoch(train) [1][150/240] base_lr: 1.0000e-03 lr: 1.4900e-04 eta: 10:45:13 time: 0.1159 data_time: 0.0052 memory: 1846 grad_norm: inf loss: 13.8473 loss_cls: 7.8785 loss_bbox: 3.5179 loss_dfl: 2.4508 12/10 16:18:25 - mmengine - INFO - Epoch(train) [1][200/240] base_lr: 1.0000e-03 lr: 1.9900e-04 eta: 9:02:49 time: 0.1184 data_time: 0.0071 memory: 1842 grad_norm: 654.9941 loss: 15.0733 loss_cls: 7.4990 loss_bbox: 4.4653 loss_dfl: 3.1090 12/10 16:18:30 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_sgd_1e-3_40e_8gpus_finetune_coco_20241210_161716 12/10 16:18:38 - mmengine - INFO - Epoch(val) [1][50/60] eta: 0:00:01 time: 0.1562 data_time: 0.1013 memory: 3409 12/10 16:18:39 - mmengine - INFO - Evaluating bbox... Loading and preparing results... DONE (t=0.00s) creating index... index created! Running per image evaluation... Evaluate annotation type *bbox* DONE (t=0.02s). Accumulating evaluation results... DONE (t=0.00s). Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = -1.000 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = -1.000 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = -1.000 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 12/10 16:18:39 - mmengine - INFO - bbox_mAP_copypaste: -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 12/10 16:18:39 - mmengine - INFO - Epoch(val) [1][60/60] coco/bbox_mAP: -1.0000 coco/bbox_mAP_50: -1.0000 coco/bbox_mAP_75: -1.0000 coco/bbox_mAP_s: -1.0000 coco/bbox_mAP_m: -1.0000 coco/bbox_mAP_l: -1.0000 data_time: 0.0844 time: 0.1357
The text was updated successfully, but these errors were encountered: