Merge pull request #2 from uug-ai/feature-integrate-roboflow

Feature integrate roboflow
uug-ai · Aug 20, 2024 · 3b64320 · 3b64320
2 parents f4d6634 + 2757622
commit 3b64320
Show file tree

Hide file tree

Showing 9 changed files with 321 additions and 77 deletions.
diff --git a/.env b/.env
@@ -3,12 +3,12 @@
 MODEL_NAME = "yolov8n.pt"
 CONDITION = "1 persons detected" # or "5 cars detected"
 
-# Forwarding 
+# Forwarding
 FORWARDING_MEDIA = "True"
 REMOVE_AFTER_PROCESSED = "True"
 
 # Queue parameters
-QUEUE_NAME="data-filtering" 
+QUEUE_NAME="data-harvesting"
 TARGET_QUEUE_NAME=""
 QUEUE_EXCHANGE=""
 QUEUE_HOST="rabbitmq-broker.xxx.xx:5672"
@@ -20,6 +20,11 @@ STORAGE_URI="https://vault.xxx.xx/api"
 STORAGE_ACCESS_KEY="xxxx"
 STORAGE_SECRET_KEY="xxx"
 
+# Roboflow parameters
+RBF_API_KEY = "xxx"
+RBF_WORKSPACE = "xxx"
+RBF_PROJECT = "xxx"
+
 # Feature parameters
 PLOT = "True"
 SAVE_VIDEO = "True"
@@ -33,4 +38,4 @@ LOGGING = "True"
 CLASSIFICATION_FPS = "5"
 CLASSIFICATION_THRESHOLD = "0.2"
 MAX_NUMBER_OF_PREDICTIONS = "100"
-ALLOWED_CLASSIFICATIONS = "0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 28"
+ALLOWED_CLASSIFICATIONS = "0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 28"
diff --git a/Dockerfile b/Dockerfile
@@ -43,7 +43,7 @@ ENV MEDIA_SAVEPATH "/ml/data/input/input_video.mp4"
 ENV MODEL_NAME "yolov8n.pt"
 
 # Queue parameters
-ENV QUEUE_NAME "" 
+ENV QUEUE_NAME ""
 ENV TARGET_QUEUE_NAME ""
 ENV QUEUE_EXCHANGE ""
 ENV QUEUE_HOST ""
@@ -83,6 +83,5 @@ ENV ALLOWED_CLASSIFICATIONS "0, 1, 2, 3, 5, 7, 14, 15, 16, 24, 26, 28"
 
 
 # Run the application
-ENTRYPOINT ["python" , "queue_filter.py"]
-
+ENTRYPOINT ["python" , "queue_harvesting.py"]
 
diff --git a/README.md b/README.md
@@ -10,4 +10,33 @@ Automated dataset harvesting involves using algorithms and tools to continuously
 
 ## What's in this project
 
-@ TODO
+This project contains several key components responsible for different stages of the data harvesting process:
+- `single-shot.py`: This is a test version of the harvesting process, designed to work with a single video. It allows you to test and validate the extraction and processing pipeline on a smaller scale before applying to the scaled up `queue_harvesting.py`.
+- `queue_harvesting.py`: The main script of the project. It reads keys the message broker and from your vault, processes videos based on these keys, extracts relevant frames, and then uploads them to Roboflow. This script orchestrates the entire data harvesting process, ensuring seamless integration with cloud resources and data repositories.
+- `condition.py`: This script is responsible for frame extraction and parameter configuration. It defines the conditions under which frames from videos are extracted, ensuring only relevant data is processed, in this project we are using dual validation, which means we pass the frame into 2 models (could be the same or different model, but make sure they share them same classes).
+  - For example: if model 1 has  3 classes: 0 - Person, 1 - Helmet, 2 - Head. Model2 has to have the same classes with the same order.
+- `.env`: This file contains environment-specific variables that are used to configure the scripts without hard-coding sensitive information. Typical variables might include API keys, database URLs, or credentials needed to access cloud services. Ensure that this file is properly configured before running the scripts, and keep it secure to prevent unauthorized access.
+
+
+### `single-shot.py`
+- Let's start with the test version that works with a single video.
+- Environment Variables Setup: The script initializes environment variables through the VariableClass, which holds important configurations reading from `.env` file.
+- We use the YOLO (You Only Look Once) model for object detection, letting the frame be predicted by these 2 models, compare and get the result with higher accuracy, see more at [condition.py](#conditionpy).
+The device (CPU or GPU) is automatically selected based on availability.
+- Object Classification:
+  - We process the video by skipping frames based on the configured `frame_skip_factor` and `skip_frames_counter` to optimize performance, since we want the dataset to be diversity, 2 consecutive frame does not hold much useful information about the object.
+  - For each processed frame, the processFrame function is used to detect objects and check if the condition is met (e.g., detecting a certain number of people), see more at [condition.py](#conditionpy).
+  - If the condition is met, the relevant frame and associated labels/boxes are saved, and further frames are skipped for efficiency.
+- Result handling:
+  - If any frames meet the condition, they are uploaded to Roboflow.
+
+### `queue_harvesting.py`
+- Scaled up version of `single-shot.py`. Hence, the code logic is exactly the same except the message broker connection. By doing this it can continuously process videos from the cloud.
+
+### `condition.py`
+- The file is responsible for processing video frames to detect objects using pre-trained models (YOLO) and applying custom logic to determine if certain conditions are met (e.g., detecting a specific number of objects with certain attributes). If the conditions are met, the relevant frames and associated labels/boxes are prepared for further processing or storage.
+- Frame Processing:
+  - `processFrame` function: This is the core function that takes a video frame, performs object classification using two YOLO models, and checks whether the detection results satisfy predefined conditions.
+  - YOLO Object Tracking: The `MODEL.track()` method is used to perform object detection on the frame. Tracking is enabled to maintain unique IDs for detected objects across frames.
+    - For more information and parameter please check: [YOLOv8 model.track](https://docs.ultralytics.com/modes/track/)
+  - Dual-Model Support: We use a secondary model (MODEL2) to enhance the detection process by combining results from two different models.
diff --git a/condition.py b/condition.py
@@ -10,7 +10,7 @@
 # Function to process the frame.
 
 
-def processFrame(MODEL, frame, video_out):
+def processFrame(MODEL, MODEL2, frame, video_out='', frames_out=''):
     # Perform object classification on the frame.
     # persist=True -> The tracking results are stored in the model.
     # persist should be kept True, as this provides unique IDs for each detection.
@@ -25,7 +25,17 @@ def processFrame(MODEL, frame, video_out):
         source=frame,
         persist=True,
         verbose=False,
+        iou=0.85,
         conf=var.CLASSIFICATION_THRESHOLD)
+    results2 = None
+    if MODEL2:
+        results2 = MODEL2.track(
+            source=frame,
+            persist=True,
+            verbose=False,
+            iou=0.85,
+            conf=var.CLASSIFICATION_THRESHOLD)
+        results2 = results2[0]
 
     if var.TIME_VERBOSE:
         total_time_class_prediction += time.time() - start_time_class_prediction
@@ -36,40 +46,70 @@ def processFrame(MODEL, frame, video_out):
     # Check if the results are not None,
     #  Otherwise, the postprocessing should not be done.
     # Iterate over the detected objects and their masks.
+    results = results[0] # Pick the first element since it returned a list of Result not the object itself
 
     annotated_frame = frame.copy()
 
     # Empty frame containing labels with bounding boxes
-    labelsAndBoxes = []
-
-    if results is not None:
-
-        # Using the results of the classification, we can verify if we have a condition met.
-        # We can look for example for people who are:
-        # - not wearing a helmet,
-        # - people with a blue shirt,
-        # - cars driving in the opposite direction,
-        # - etc.
-        # You are in the driving seat so you can write your custom code to detect the condition
-        # you are looking for.
-
-        is_condition_met = False
-        # Write your function here or custom code and set is_condition_met to True if the condition is met.
-        # is_condition_met = condition(results)
-        # labelsAndBoxes = ....
-
-        if is_condition_met:
-            print("Condition met, we are gathering the labelsAndBoxes and return results")
-            return frame, total_time_class_prediction, True, labelsAndBoxes
-        else:
-            print("Condition not met")
+    labels_and_boxes = ''
+
+    # if results is not None:
+    #     # Using the results of the classification, we can verify if we have a condition met.
+    #     # We can look for example for people who are:
+    #     # - not wearing a helmet,
+    #     # - people with a blue shirt,
+    #     # - cars driving in the opposite direction,
+    #     # - etc.
+    #     # You are in the driving seat so you can write your custom code to detect the condition
+    #     # you are looking for.
+    #     if len(results.boxes) >= var.MIN_DETECTIONS: # If there are at least 5 boxes found (Could belong to either class)
+    #         print("Condition met, we are gathering the labels and boxes and return results")
+    #         # Extract label and boxes from result in YOLOv8 format
+    #         for cls_item, xywhn_item in zip(results.boxes.cls.tolist(), results.boxes.xywhn):
+    #             labels_and_boxes = labels_and_boxes + f'{int(cls_item)} {xywhn_item[0]} {xywhn_item[1]} {xywhn_item[2]} {xywhn_item[3]}\n'
+    #
+    #         return frame, total_time_class_prediction, True, labels_and_boxes
+    #     else:
+    #         print("Condition not met, skipping frame")
+
+    if results is not None or results2 is not None:
+        combined_results = []
+
+        # Check the condition to process frames
+        # Since we have over 1k videos per day, the dataset we collect need to be high-quality
+        # Valid image need to:
+        # + Have at least MIN_DETECTIONS objects detected:
+        # + Have to have helmet (since we are lacking of helmet dataset)
+        # + Number of helmet and person detected are equal (make sure every person wearing a helmet is detected)
+        if (len(results.boxes) > 0
+                and len(results2.boxes) > 0
+                and (any(box.cls == 1 for box in results2.boxes)
+                     or any(box.cls == 2 for box in results.boxes))
+                and sum(box.cls == 1 for box in results.boxes) == sum(box.cls == 2 for box in results.boxes)):
+            for box1, box2 in zip(results.boxes, results2.boxes):
+                if box1.cls == box2.cls:
+                    avg_conf = (box1.conf + box2.conf) / 2
+                    if box1.conf >= box2.conf:
+                        combined_results.append((box1.xywhn, box1.cls, avg_conf))
+                    else:
+                        combined_results.append((box2.xywhn, box2.cls, avg_conf))
+
+            # Add any remaining boxes from model 1 or model 2 if their counts are different
+            combined_results += [(box.xywhn, box.cls, box.conf) for box in results.boxes[len(combined_results):]]
+            combined_results += [(box.xywhn, box.cls, box.conf) for box in results2.boxes[len(combined_results):]]
+
+        if len(combined_results) >= var.MIN_DETECTIONS:  # If the combined result has at least 5 boxes found (Could belong to either class)
+            print("Condition met, we are gathering the labels and boxes and return results")
+            for xywhn, cls, conf in combined_results:
+                labels_and_boxes += f'{int(cls[0])} {xywhn[0, 0].item()} {xywhn[0, 1].item()} {xywhn[0, 2].item()} {xywhn[0, 3].item()}\n'
+            return frame, total_time_class_prediction, True, labels_and_boxes
 
         # Annotate the frame with the classification objects.
         # Draw the class name and the confidence on the frame.
         if var.SAVE_VIDEO or var.PLOT:
-            for box, mask in zip(results[0].boxes, results[0].masks or [None] * len(results[0].boxes)):
+            for box, mask in zip(results.boxes, results.masks or [None] * len(results.boxes)):
                 # Translate the class name to a human-readable format and display it on the frame.
-                object_name = translate(results[0].names[int(box.cls)])
+                object_name = translate(results.names[int(box.cls)])
                 cv2.putText(
                     img=annotated_frame,
                     text=object_name,
@@ -103,4 +143,4 @@ def processFrame(MODEL, frame, video_out):
         video_out.write(
             annotated_frame) if var.SAVE_VIDEO else None
 
-    return frame, total_time_class_prediction, False, labelsAndBoxes
+    return frame, total_time_class_prediction, False, labels_and_boxes
diff --git a/connections/roboflow_helper.py b/connections/roboflow_helper.py
@@ -0,0 +1,46 @@
+import shutil
+from os.path import basename as pbasename
+
+import roboflow
+
+from utils.VariableClass import VariableClass
+
+
+var = VariableClass()
+class RoboflowHelper:
+    def __init__(self):
+        self.agent, self.ws, self.project = self.__login__
+
+    @property
+    def __login__(self):
+        try:
+            # Attempt to initialize Roboflow with the API key
+            agent = roboflow.Roboflow(api_key=var.ROBOFLOW_API_KEY)
+
+            # Access the workspace
+            workspace = agent.workspace(var.ROBOFLOW_WORKSPACE)
+
+            # Access the project
+            project = workspace.project(var.ROBOFLOW_PROJECT)
+
+            return agent, workspace, project
+
+        except Exception as e:
+            # Handle any exceptions
+            raise ConnectionRefusedError(f'Error during Roboflow login: {e}')
+
+    def upload_dataset(self, src_project_path):
+            # Upload data set to an existing project
+            self.ws.upload_dataset(
+                src_project_path,
+                pbasename(self.project.id),
+                num_workers=10,
+                project_license="MIT",
+                project_type="object-detection",
+                batch_name=None,
+                num_retries=0
+            )
+            print('Uploaded')
+
+            # Remove local folder when uploaded
+            shutil.rmtree(src_project_path)