From ce408a12827b15743192e90f50c8cb88ceb0906f Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Wed, 9 Oct 2024 15:34:15 +0200
Subject: [PATCH 1/3] Adding opencv dependency to qwenvl2

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py               |  6 ++++++
 node-hub/dora-qwenvl/pyproject.toml                    |  1 +
 .../llama_factory_recorder/main.py                     | 10 +++++++++-
 node-hub/llama-factory-recorder/pyproject.toml         |  1 +
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index e03b31c1..e2d16e8f 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pyarrow as pa
 from PIL import Image
+import cv2
 
 DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct"
 CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH)
@@ -127,6 +128,11 @@ def main():
                     frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                 elif encoding == "rgb8":
                     pass
+                elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+                    channels = 3
+                    storage_type = np.uint8
+                    storage = storage.to_numpy()
+                    frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
                 frames[event_id] = Image.fromarray(frame)
diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml
index e302b10a..eac55946 100644
--- a/node-hub/dora-qwenvl/pyproject.toml
+++ b/node-hub/dora-qwenvl/pyproject.toml
@@ -19,6 +19,7 @@ torchvision = "^0.19"
 transformers = "^4.45"
 qwen-vl-utils = "^0.0.2"
 accelerate = "^0.33"
+opencv-python = ">= 4.1.1"
 # flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation
 
 
diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
index e9a26c64..ba645e8f 100644
--- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
+++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -5,6 +5,7 @@
 import pyarrow as pa
 from PIL import Image
 from pathlib import Path
+import cv2
 
 DEFAULT_QUESTION = os.getenv(
     "DEFAULT_QUESTION",
@@ -155,12 +156,19 @@ def main():
                     .reshape((height, width, channels))
                 )
                 if encoding == "bgr8":
-                    frames[event_id] = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                 elif encoding == "rgb8":
                     pass
+                elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+                    channels = 3
+                    storage_type = np.uint8
+                    storage = storage.to_numpy()
+                    frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
+                frames[event_id] = frame
+
             elif event_id == "text":
                 text = event["value"][0].as_py()
                 if text != "":
diff --git a/node-hub/llama-factory-recorder/pyproject.toml b/node-hub/llama-factory-recorder/pyproject.toml
index 34a55c0a..75f15bc0 100644
--- a/node-hub/llama-factory-recorder/pyproject.toml
+++ b/node-hub/llama-factory-recorder/pyproject.toml
@@ -14,6 +14,7 @@ packages = [{ include = "llama_factory_recorder" }]
 python = "^3.7"
 dora-rs = "^0.3.6"
 pillow = "^10.4.0"
+opencv-python = ">= 4.1.1"
 
 [tool.poetry.scripts]
 llama-factory-recorder = "llama_factory_recorder.main:main"

From 254e74d4accb5cff13a29d1d9d01514863679842 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Thu, 10 Oct 2024 05:31:38 +0200
Subject: [PATCH 2/3] Skip image that cannot be encoded

---
 node-hub/opencv-video-capture/opencv_video_capture/main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py
index 71bc5ede..2e09b815 100644
--- a/node-hub/opencv-video-capture/opencv_video_capture/main.py
+++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py
@@ -120,7 +120,10 @@ def main():
                 if encoding == "rgb8":
                     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
-                    frame = cv2.imencode("." + encoding, frame)[1]
+                    ret, frame = cv2.imencode("." + encoding, frame)
+                    if not ret:
+                        print("Could not encode image...")
+                        continue
 
                 storage = pa.array(frame.ravel())
 

From 4dcf2cf572d263700fb7dda96d808c46822b7704 Mon Sep 17 00:00:00 2001
From: LyonRust <echo_ai@foxmail.com>
Date: Thu, 10 Oct 2024 19:00:46 +0800
Subject: [PATCH 3/3] Fix small bug for jpeg encoding

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py      | 22 ++++++++++++-------
 .../llama_factory_recorder/main.py            | 20 ++++++++++++-----
 .../opencv_video_capture/main.py              | 19 +++++++++++-----
 3 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index e2d16e8f..c6c735b5 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -116,23 +116,29 @@ def main():
                 elif encoding == "rgb8":
                     channels = 3
                     storage_type = np.uint8
+                elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+                    channels = 3
+                    storage_type = np.uint8
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
-                frame = (
-                    storage.to_numpy()
-                    .astype(storage_type)
-                    .reshape((height, width, channels))
-                )
                 if encoding == "bgr8":
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
                     frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                 elif encoding == "rgb8":
-                    pass
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
                 elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
-                    channels = 3
-                    storage_type = np.uint8
                     storage = storage.to_numpy()
                     frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
                 frames[event_id] = Image.fromarray(frame)
diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
index ba645e8f..c079cde9 100644
--- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
+++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -147,23 +147,31 @@ def main():
                 elif encoding == "rgb8":
                     channels = 3
                     storage_type = np.uint8
+                elif encoding == "jpeg":
+                    channels = 3
+                    storage_type = np.uint8
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
-                frame = (
-                    storage.to_numpy()
-                    .astype(storage_type)
-                    .reshape((height, width, channels))
-                )
                 if encoding == "bgr8":
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
                     frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                 elif encoding == "rgb8":
-                    pass
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
                 elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
                     channels = 3
                     storage_type = np.uint8
                     storage = storage.to_numpy()
                     frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py
index 2e09b815..7042f29a 100644
--- a/node-hub/opencv-video-capture/opencv_video_capture/main.py
+++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py
@@ -10,6 +10,8 @@
 
 RUNNER_CI = True if os.getenv("CI") == "true" else False
 
+FLIP = os.getenv("FLIP", "")
+
 
 def main():
     # Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables.
@@ -101,10 +103,12 @@ def main():
                         1,
                     )
 
-                metadata = event["metadata"]
-                metadata["encoding"] = encoding
-                metadata["width"] = int(frame.shape[1])
-                metadata["height"] = int(frame.shape[0])
+                if FLIP == "VERTICAL":
+                    frame = cv2.flip(frame, 0)
+                elif FLIP == "HORIZONTAL":
+                    frame = cv2.flip(frame, 1)
+                elif FLIP == "BOTH":
+                    frame = cv2.flip(frame, -1)
 
                 # resize the frame
                 if (
@@ -116,13 +120,18 @@ def main():
                 ):
                     frame = cv2.resize(frame, (image_width, image_height))
 
+                metadata = event["metadata"]
+                metadata["encoding"] = encoding
+                metadata["width"] = int(frame.shape[1])
+                metadata["height"] = int(frame.shape[0])
+
                 # Get the right encoding
                 if encoding == "rgb8":
                     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
                     ret, frame = cv2.imencode("." + encoding, frame)
                     if not ret:
-                        print("Could not encode image...")
+                        print("Error encoding image...")
                         continue
 
                 storage = pa.array(frame.ravel())