diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index e03b31c1..c6c735b5 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -5,6 +5,7 @@ import numpy as np import pyarrow as pa from PIL import Image +import cv2 DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct" CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH) @@ -115,18 +116,29 @@ def main(): elif encoding == "rgb8": channels = 3 storage_type = np.uint8 + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + channels = 3 + storage_type = np.uint8 else: raise RuntimeError(f"Unsupported image encoding: {encoding}") - frame = ( - storage.to_numpy() - .astype(storage_type) - .reshape((height, width, channels)) - ) if encoding == "bgr8": + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) elif encoding == "rgb8": - pass + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + storage = storage.to_numpy() + frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) + frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") frames[event_id] = Image.fromarray(frame) diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml index e302b10a..eac55946 100644 --- a/node-hub/dora-qwenvl/pyproject.toml +++ b/node-hub/dora-qwenvl/pyproject.toml @@ -19,6 +19,7 @@ torchvision = "^0.19" transformers = "^4.45" qwen-vl-utils = "^0.0.2" accelerate = "^0.33" +opencv-python = ">= 4.1.1" # flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py index e9a26c64..c079cde9 100644 --- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py +++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py @@ -5,6 +5,7 @@ import pyarrow as pa from PIL import Image from pathlib import Path +import cv2 DEFAULT_QUESTION = os.getenv( "DEFAULT_QUESTION", @@ -146,21 +147,36 @@ def main(): elif encoding == "rgb8": channels = 3 storage_type = np.uint8 + elif encoding == "jpeg": + channels = 3 + storage_type = np.uint8 else: raise RuntimeError(f"Unsupported image encoding: {encoding}") - frame = ( - storage.to_numpy() - .astype(storage_type) - .reshape((height, width, channels)) - ) if encoding == "bgr8": - frames[event_id] = frame[:, :, ::-1] # OpenCV image (BGR to RGB) + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) + frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) elif encoding == "rgb8": - pass + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + channels = 3 + storage_type = np.uint8 + storage = storage.to_numpy() + frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) + frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") + frames[event_id] = frame + elif event_id == "text": text = event["value"][0].as_py() if text != "": diff --git a/node-hub/llama-factory-recorder/pyproject.toml b/node-hub/llama-factory-recorder/pyproject.toml index 34a55c0a..75f15bc0 100644 --- a/node-hub/llama-factory-recorder/pyproject.toml +++ b/node-hub/llama-factory-recorder/pyproject.toml @@ -14,6 +14,7 @@ packages = [{ include = "llama_factory_recorder" }] python = "^3.7" dora-rs = "^0.3.6" pillow = "^10.4.0" +opencv-python = ">= 4.1.1" [tool.poetry.scripts] llama-factory-recorder = "llama_factory_recorder.main:main" diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py index 71bc5ede..7042f29a 100644 --- a/node-hub/opencv-video-capture/opencv_video_capture/main.py +++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py @@ -10,6 +10,8 @@ RUNNER_CI = True if os.getenv("CI") == "true" else False +FLIP = os.getenv("FLIP", "") + def main(): # Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables. @@ -101,10 +103,12 @@ def main(): 1, ) - metadata = event["metadata"] - metadata["encoding"] = encoding - metadata["width"] = int(frame.shape[1]) - metadata["height"] = int(frame.shape[0]) + if FLIP == "VERTICAL": + frame = cv2.flip(frame, 0) + elif FLIP == "HORIZONTAL": + frame = cv2.flip(frame, 1) + elif FLIP == "BOTH": + frame = cv2.flip(frame, -1) # resize the frame if ( @@ -116,11 +120,19 @@ def main(): ): frame = cv2.resize(frame, (image_width, image_height)) + metadata = event["metadata"] + metadata["encoding"] = encoding + metadata["width"] = int(frame.shape[1]) + metadata["height"] = int(frame.shape[0]) + # Get the right encoding if encoding == "rgb8": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: - frame = cv2.imencode("." + encoding, frame)[1] + ret, frame = cv2.imencode("." + encoding, frame) + if not ret: + print("Error encoding image...") + continue storage = pa.array(frame.ravel())