From ce408a12827b15743192e90f50c8cb88ceb0906f Mon Sep 17 00:00:00 2001 From: haixuanTao Date: Wed, 9 Oct 2024 15:34:15 +0200 Subject: [PATCH 1/3] Adding opencv dependency to qwenvl2 --- node-hub/dora-qwenvl/dora_qwenvl/main.py | 6 ++++++ node-hub/dora-qwenvl/pyproject.toml | 1 + .../llama_factory_recorder/main.py | 10 +++++++++- node-hub/llama-factory-recorder/pyproject.toml | 1 + 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index e03b31c1..e2d16e8f 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -5,6 +5,7 @@ import numpy as np import pyarrow as pa from PIL import Image +import cv2 DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct" CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH) @@ -127,6 +128,11 @@ def main(): frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) elif encoding == "rgb8": pass + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + channels = 3 + storage_type = np.uint8 + storage = storage.to_numpy() + frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") frames[event_id] = Image.fromarray(frame) diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml index e302b10a..eac55946 100644 --- a/node-hub/dora-qwenvl/pyproject.toml +++ b/node-hub/dora-qwenvl/pyproject.toml @@ -19,6 +19,7 @@ torchvision = "^0.19" transformers = "^4.45" qwen-vl-utils = "^0.0.2" accelerate = "^0.33" +opencv-python = ">= 4.1.1" # flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py index e9a26c64..ba645e8f 100644 --- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py +++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py @@ -5,6 +5,7 @@ import pyarrow as pa from PIL import Image from pathlib import Path +import cv2 DEFAULT_QUESTION = os.getenv( "DEFAULT_QUESTION", @@ -155,12 +156,19 @@ def main(): .reshape((height, width, channels)) ) if encoding == "bgr8": - frames[event_id] = frame[:, :, ::-1] # OpenCV image (BGR to RGB) + frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) elif encoding == "rgb8": pass + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + channels = 3 + storage_type = np.uint8 + storage = storage.to_numpy() + frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") + frames[event_id] = frame + elif event_id == "text": text = event["value"][0].as_py() if text != "": diff --git a/node-hub/llama-factory-recorder/pyproject.toml b/node-hub/llama-factory-recorder/pyproject.toml index 34a55c0a..75f15bc0 100644 --- a/node-hub/llama-factory-recorder/pyproject.toml +++ b/node-hub/llama-factory-recorder/pyproject.toml @@ -14,6 +14,7 @@ packages = [{ include = "llama_factory_recorder" }] python = "^3.7" dora-rs = "^0.3.6" pillow = "^10.4.0" +opencv-python = ">= 4.1.1" [tool.poetry.scripts] llama-factory-recorder = "llama_factory_recorder.main:main" From 254e74d4accb5cff13a29d1d9d01514863679842 Mon Sep 17 00:00:00 2001 From: haixuanTao Date: Thu, 10 Oct 2024 05:31:38 +0200 Subject: [PATCH 2/3] Skip image that cannot be encoded --- node-hub/opencv-video-capture/opencv_video_capture/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py index 71bc5ede..2e09b815 100644 --- a/node-hub/opencv-video-capture/opencv_video_capture/main.py +++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py @@ -120,7 +120,10 @@ def main(): if encoding == "rgb8": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: - frame = cv2.imencode("." + encoding, frame)[1] + ret, frame = cv2.imencode("." + encoding, frame) + if not ret: + print("Could not encode image...") + continue storage = pa.array(frame.ravel()) From 4dcf2cf572d263700fb7dda96d808c46822b7704 Mon Sep 17 00:00:00 2001 From: LyonRust Date: Thu, 10 Oct 2024 19:00:46 +0800 Subject: [PATCH 3/3] Fix small bug for jpeg encoding --- node-hub/dora-qwenvl/dora_qwenvl/main.py | 22 ++++++++++++------- .../llama_factory_recorder/main.py | 20 ++++++++++++----- .../opencv_video_capture/main.py | 19 +++++++++++----- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index e2d16e8f..c6c735b5 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -116,23 +116,29 @@ def main(): elif encoding == "rgb8": channels = 3 storage_type = np.uint8 + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + channels = 3 + storage_type = np.uint8 else: raise RuntimeError(f"Unsupported image encoding: {encoding}") - frame = ( - storage.to_numpy() - .astype(storage_type) - .reshape((height, width, channels)) - ) if encoding == "bgr8": + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) elif encoding == "rgb8": - pass + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: - channels = 3 - storage_type = np.uint8 storage = storage.to_numpy() frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) + frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") frames[event_id] = Image.fromarray(frame) diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py index ba645e8f..c079cde9 100644 --- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py +++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py @@ -147,23 +147,31 @@ def main(): elif encoding == "rgb8": channels = 3 storage_type = np.uint8 + elif encoding == "jpeg": + channels = 3 + storage_type = np.uint8 else: raise RuntimeError(f"Unsupported image encoding: {encoding}") - frame = ( - storage.to_numpy() - .astype(storage_type) - .reshape((height, width, channels)) - ) if encoding == "bgr8": + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) elif encoding == "rgb8": - pass + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: channels = 3 storage_type = np.uint8 storage = storage.to_numpy() frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) + frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py index 2e09b815..7042f29a 100644 --- a/node-hub/opencv-video-capture/opencv_video_capture/main.py +++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py @@ -10,6 +10,8 @@ RUNNER_CI = True if os.getenv("CI") == "true" else False +FLIP = os.getenv("FLIP", "") + def main(): # Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables. @@ -101,10 +103,12 @@ def main(): 1, ) - metadata = event["metadata"] - metadata["encoding"] = encoding - metadata["width"] = int(frame.shape[1]) - metadata["height"] = int(frame.shape[0]) + if FLIP == "VERTICAL": + frame = cv2.flip(frame, 0) + elif FLIP == "HORIZONTAL": + frame = cv2.flip(frame, 1) + elif FLIP == "BOTH": + frame = cv2.flip(frame, -1) # resize the frame if ( @@ -116,13 +120,18 @@ def main(): ): frame = cv2.resize(frame, (image_width, image_height)) + metadata = event["metadata"] + metadata["encoding"] = encoding + metadata["width"] = int(frame.shape[1]) + metadata["height"] = int(frame.shape[0]) + # Get the right encoding if encoding == "rgb8": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: ret, frame = cv2.imencode("." + encoding, frame) if not ret: - print("Could not encode image...") + print("Error encoding image...") continue storage = pa.array(frame.ravel())