Merge pull request ultralytics#17 from Laughing-q/instance_seg

updates
zldrobit · Aug 14, 2022 · 017e2cf · 017e2cf
2 parents a271758 + bd2e4cd
commit 017e2cf
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 148 deletions.
diff --git a/segment/train.py b/segment/train.py
@@ -68,7 +68,6 @@
 from datetime import datetime
 
 def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
-    print(device)
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
         opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio
@@ -419,7 +418,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                                            single_cls=single_cls,
                                            dataloader=val_loader,
                                            save_dir=save_dir,
-                                           plots=False,
+                                           plots=plots,
                                            callbacks=callbacks,
                                            compute_loss=compute_loss, 
                                            mask_downsample_ratio=mask_ratio,
@@ -485,7 +484,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
                         plots=plots,
                         callbacks=callbacks,
                         compute_loss=compute_loss,
-                        mask_downsample_ratio=1,
+                        mask_downsample_ratio=mask_ratio,
                         overlap=overlap)  # val best model with plots
                     if is_coco:
                         callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)

diff --git a/segment/val.py b/segment/val.py
@@ -300,7 +300,7 @@ def run(
                             shape=im[si].shape[1:]).permute(2, 0, 1).contiguous()
             if plots and batch_i < 3:
                 # filter top 15 to plot
-                plot_masks.append(pred_masks[:15].cpu())
+                plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu())
 
             # Predictions
             if single_cls:
@@ -388,8 +388,7 @@ def run(
     # Save JSON
     if save_json and len(jdict):
         w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
-        # anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
-        anno_json = "/d/dataset/COCO/annotations/instances_val2017.json"
+        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
         pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
         LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
         with open(pred_json, 'w') as f:

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
@@ -135,8 +135,8 @@ def create_dataloader(path,
     nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
-    generator = torch.Generator()
-    generator.manual_seed(0)
+    # generator = torch.Generator()
+    # generator.manual_seed(0)
     return loader(dataset,
                   batch_size=batch_size,
                   shuffle=shuffle and sampler is None,
@@ -145,7 +145,8 @@ def create_dataloader(path,
                   pin_memory=True,
                   collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn,
                   worker_init_fn=seed_worker,
-                  generator=generator), dataset
+                  # generator=generator,
+                  ), dataset
 
 
 class InfiniteDataLoader(dataloader.DataLoader):

diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py
@@ -81,37 +81,20 @@ def random_perspective(im,
     n = len(targets)
     new_segments = []
     if n:
-        use_segments = any(x.any() for x in segments)
         new = np.zeros((n, 4))
-        if use_segments:  # warp segments
-            segments = resample_segments(segments)  # upsample
-            for i, segment in enumerate(segments):
-                xy = np.ones((len(segment), 3))
-                xy[:, :2] = segment
-                xy = xy @ M.T  # transform
-                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
-
-                # clip
-                new[i] = segment2box(xy, width, height)
-                new_segments.append(xy)
-
-        else:  # warp boxes
-            xy = np.ones((n * 4, 3))
-            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        segments = resample_segments(segments)  # upsample
+        for i, segment in enumerate(segments):
+            xy = np.ones((len(segment), 3))
+            xy[:, :2] = segment
             xy = xy @ M.T  # transform
-            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
-
-            # create new boxes
-            x = xy[:, [0, 2, 4, 6]]
-            y = xy[:, [1, 3, 5, 7]]
-            new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T)
+            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2])  # perspective rescale or affine
 
             # clip
-            new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
-            new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+            new[i] = segment2box(xy, width, height)
+            new_segments.append(xy)
 
         # filter candidates
-        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01)
         targets = targets[i]
         targets[:, 1:5] = new[i]
         new_segments = np.array(new_segments)[i]

diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
@@ -62,8 +62,8 @@ def create_dataloader(path,
     nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
     sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
     loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
-    generator = torch.Generator()
-    generator.manual_seed(0)
+    # generator = torch.Generator()
+    # generator.manual_seed(0)
     return loader(dataset,
                   batch_size=batch_size,
                   shuffle=shuffle and sampler is None,
@@ -72,7 +72,8 @@ def create_dataloader(path,
                   pin_memory=True,
                   collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
                   worker_init_fn=seed_worker,
-                  generator=generator), dataset
+                  # generator=generator,
+                  ), dataset
 
 
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing

diff --git a/utils/segment/loss.py b/utils/segment/loss.py
@@ -92,7 +92,7 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                 if self.sort_obj_iou:
                     sort_id = torch.argsort(score_iou)
                     b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],)
-                tobj[b, a, gj, gi] = 0.5 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
+                tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou)  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
@@ -131,12 +131,12 @@ def __call__(self, preds, targets, masks):  # predictions, targets, model
                     psi = ps[index][:, 5: self.nm]
                     proto = proto_out[bi]
 
-                    one_lseg, iou = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
+                    one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh)
                     batch_lseg += one_lseg
 
-                    # update tobj
-                    iou = iou.detach().clamp(0).type(tobj.dtype)
-                    tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
+                    # # update tobj
+                    # iou = iou.detach().clamp(0).type(tobj.dtype)
+                    # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0]
 
                 lseg += batch_lseg / len(b.unique())
 
@@ -161,11 +161,11 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h):
         # (80, 80, 32) @ (32, n) -> (80, 80, n)
         pred_mask = proto @ pred.tanh().T
         # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy)
-        iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
+        # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True)
         lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
         lseg = crop(lseg, xyxy)
         lseg = lseg.mean(dim=(0, 1)) / w / h
-        return lseg.mean(), iou# + lseg_iou.mean()
+        return lseg.mean()#, iou# + lseg_iou.mean()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)

diff --git a/utils/segment/plots.py b/utils/segment/plots.py
@@ -201,110 +201,6 @@ def plot_images_and_masks(
             im.save(fname)
     return mosaic
 
-# def plot_images_and_masks(
-#     images,
-#     targets,
-#     masks,
-#     paths=None,
-#     fname="images.jpg",
-#     names=None,
-#     max_size=640,
-#     max_subplots=16,
-# ):
-#     # plot masks first in torch way,
-#     # this is faster if masks are in cuda.
-#     masks = torch.as_tensor(masks, dtype=torch.float32)
-#     images = torch.as_tensor(images, dtype=torch.float32, device=masks.device)
-#     if isinstance(targets, torch.Tensor):
-#         targets = targets.cpu().numpy()
-#
-#     # normalize
-#     if images[0].max() > 1:
-#         images /= 255
-#
-#     images_with_masks = []
-#     for i, img in enumerate(images):
-#         if len(targets) == 0:
-#             continue
-#         idx = (targets[:, 0]).astype(int)
-#         image_targets = targets[idx == i]
-#         mcolors = np.array([colors(int(cls), bgr=True) for cls in image_targets[:, 1]])
-#         labels = image_targets.shape[1] == 6  # labels if no conf column
-#         conf = (
-#             None if labels else image_targets[:, 6]
-#         )  # check for confidence presence (label vs pred)
-#
-#         if masks.max() > 1.0:  # mean that masks are overlap
-#             image_masks = masks[[i]]  # (1, 640, 640)
-#             # convert masks (1, 640, 640) -> (n, 640, 640)
-#             nl = len(image_targets)
-#             index = torch.arange(nl, device=image_masks.device).view(nl, 1, 1) + 1
-#             image_masks = image_masks.repeat(nl, 1, 1)
-#             image_masks = torch.where(image_masks == index, 1.0, 0.0)
-#         else:
-#             image_masks = masks[idx == i]
-#         if conf is not None:
-#             image_masks = image_masks[conf > 0.25]
-#             mcolors = mcolors[conf > 0.25]
-#         image_with_masks = plot_masks(img, image_masks, mcolors)
-#         images_with_masks.append(image_with_masks[..., ::-1])
-#     images = np.stack(images_with_masks, axis=0)
-#
-#     bs, h, w, _,= images.shape  # batch size, _, height, width
-#     bs = min(bs, max_subplots)  # limit plot images
-#     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
-#
-#     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-#     for i, im in enumerate(images):
-#         if i == max_subplots:  # if last batch has fewer images than we expect
-#             break
-#         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-#         mosaic[y : y + h, x : x + w, :] = im
-#
-#     # Resize (optional)
-#     scale = max_size / ns / max(h, w)
-#     if scale < 1:
-#         h = math.ceil(scale * h)
-#         w = math.ceil(scale * w)
-#         mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
-#
-#     # Annotate
-#     fs = int((h + w) * ns * 0.01)  # font size
-#     annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
-#     for i in range(i + 1):
-#         x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
-#         annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
-#         if paths:
-#             annotator.text(
-#                 (x + 5, y + 5 + h),
-#                 text=Path(paths[i]).name[:40],
-#                 txt_color=(220, 220, 220),
-#             )  # filenames
-#         if len(targets) > 0:
-#             ti = targets[targets[:, 0] == i]  # image targets
-#             boxes = xywh2xyxy(ti[:, 2:6]).T
-#             classes = ti[:, 1].astype("int")
-#             labels = ti.shape[1] == 6  # labels if no conf column
-#             conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
-#
-#             if boxes.shape[1]:
-#                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
-#                     boxes[[0, 2]] *= w  # scale to pixels
-#                     boxes[[1, 3]] *= h
-#                 elif scale < 1:  # absolute coords need scale if image scales
-#                     boxes *= scale
-#             boxes[[0, 2]] += x
-#             boxes[[1, 3]] += y
-#             for j, box in enumerate(boxes.T.tolist()):
-#                 cls = classes[j]
-#                 color = colors(cls)
-#                 cls = names[cls] if names else cls
-#                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-#                     label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
-#                     annotator.box_label(box, label, color=color)
-#     annotator.im.save(fname)  # save
-#     return annotator.result()
-
 
 def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
     # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')