kadirnar · kadirnar · Apr 9, 2023 · Apr 9, 2023 · Apr 9, 2023
diff --git a/README.md b/README.md
@@ -51,5 +51,6 @@ autoseg_video = SegAutoMaskGenerator().save_video(
 - [x] Support for video files
 - [x] Support for pip installation
 - [x] Support for web application
+- [x] Support for box to polygon conversion
 - [x] Support for automatic download model weights
 
diff --git a/metaseg/__init__.py b/metaseg/__init__.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
+from metaseg.auto_mask_demo import SegAutoMaskGenerator
 from metaseg.automatic_mask_generator import SamAutomaticMaskGenerator
 from metaseg.build_sam import build_sam, build_sam_vit_b, build_sam_vit_h, build_sam_vit_l, sam_model_registry
-from metaseg.demo import SegAutoMaskGenerator
 from metaseg.predictor import SamPredictor
 
-__version__ = "0.3.0"
+__version__ = "0.3.2"
diff --git a/metaseg/demo.py → metaseg/auto_mask_demo.py b/metaseg/demo.py → metaseg/auto_mask_demo.py
@@ -5,6 +5,7 @@
 from metaseg import SamAutomaticMaskGenerator, sam_model_registry
 from metaseg.utils import download_model, load_image, load_video
 
+
 class SegAutoMaskGenerator:
     def __init__(self):
         self.model = None
@@ -21,8 +22,9 @@ def load_model(self, model_type):
     def predict(self, frame, model_type, points_per_side, points_per_batch, min_area):
         model = self.load_model(model_type)
         mask_generator = SamAutomaticMaskGenerator(
-            model, points_per_side=points_per_side, points_per_batch=points_per_batch, min_mask_region_area=min_area)
-
+            model, points_per_side=points_per_side, points_per_batch=points_per_batch, min_mask_region_area=min_area
+        )
+
         masks = mask_generator.generate(frame)
 
         return frame, masks

diff --git a/metaseg/manuel_mask_demo.py b/metaseg/manuel_mask_demo.py
@@ -0,0 +1,63 @@
+import cv2
+import numpy as np
+import torch
+
+from metaseg import SamPredictor, sam_model_registry
+from metaseg.utils import download_model, load_image
+
+
+class SegManualMaskGenerator:
+    def __init__(self):
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    def load_model(self, model_type):
+        if self.model is None:
+            self.model_path = download_model(model_type)
+            self.model = sam_model_registry[model_type](checkpoint=self.model_path)
+            self.model.to(device=self.device)
+
+        return self.model
+
+    def load_mask(mask, random_color=True):
+        if random_color:
+            color = np.random.rand(3) * 255
+        else:
+            color = np.array([255, 200, 0])
+        h, w = mask.shape[-2:]
+        mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+        mask_image = mask_image.astype(np.uint8)
+        return mask_image
+
+    def load_box(box, image):
+        x0, y0 = box[0], box[1]
+        x1, y1 = box[2], box[3]
+        cv2.rectangle(image, (x0, y0), (x1, y1), (0, 255, 0), 2)
+        return image
+
+    def predict(self, frame, model_type, x0, y0, x1, y1):
+        model = self.load_model(model_type)
+        predictor = SamPredictor(model)
+        predictor.set_image(frame)
+        input_box = np.array([x0, y0, x1, y1])
+        masks, _, _ = predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=input_box[None, :],
+            multimask_output=False,
+        )
+
+        return frame, masks, input_box
+
+    def save_image(self, source, model_type, x0, y0, x1, y1):
+        read_image = load_image(source)
+        image, anns, input_box = self.predict(read_image, model_type, x0, y0, x1, y1)
+        if len(anns) == 0:
+            return
+
+        mask_image = self.load_mask(anns, random_color=True)
+        image = self.load_box(input_box, image)
+        combined_mask = cv2.add(image, mask_image)
+        cv2.imwrite("output.jpg", combined_mask)
+
+        return "output.jpg"
diff --git a/metaseg/utils/__init__.py b/metaseg/utils/__init__.py
@@ -4,5 +4,5 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
-from metaseg.utils.file_utils import download_model
 from metaseg.utils.data_utils import load_image, load_video
+from metaseg.utils.file_utils import download_model
diff --git a/metaseg/utils/data_utils.py b/metaseg/utils/data_utils.py
@@ -1,10 +1,12 @@
 import cv2
 
+
 def load_image(image_path):
     image = cv2.imread(image_path)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     return image
 
+
 def load_video(video_path):
     cap = cv2.VideoCapture(video_path)
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

diff --git a/metaseg/webapp/__init__.py b/metaseg/webapp/__init__.py
diff --git a/metaseg/app.py → metaseg/webapp/app.py b/metaseg/app.py → metaseg/webapp/app.py
@@ -36,11 +36,11 @@ def image_app():
                             value=64,
                             label="Points per Batch",
                         )
-                    
+
                         seg_automask_image_min_area = gr.Number(
                             value=0,
                             label="Min Area",
-                        )  
+                        )
 
                 seg_automask_image_predict = gr.Button(value="Generator")
 
@@ -99,8 +99,6 @@ def video_app():
                                     label="Min Area",
                                 )
 
-
-
                 seg_automask_video_predict = gr.Button(value="Generator")
             with gr.Column():
                 output_video = gr.Video()