dora-rs · haixuanTao · Oct 10, 2024 · Oct 9, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pyarrow as pa
 from PIL import Image
+import cv2
 
 DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct"
 CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH)
@@ -115,18 +116,29 @@ def main():
  elif encoding == "rgb8":
  channels = 3
  storage_type = np.uint8
+ elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+ channels = 3
+ storage_type = np.uint8
  else:
  raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
- frame = (
- storage.to_numpy()
- .astype(storage_type)
- .reshape((height, width, channels))
- )
  if encoding == "bgr8":
+ frame = (
+ storage.to_numpy()
+ .astype(storage_type)
+ .reshape((height, width, channels))
+ )
  frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
  elif encoding == "rgb8":
- pass
+ frame = (
+ storage.to_numpy()
+ .astype(storage_type)
+ .reshape((height, width, channels))
+ )
+ elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+ storage = storage.to_numpy()
+ frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+ frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
  else:
  raise RuntimeError(f"Unsupported image encoding: {encoding}")
  frames[event_id] = Image.fromarray(frame)

diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml
@@ -19,6 +19,7 @@ torchvision = "^0.19"
 transformers = "^4.45"
 qwen-vl-utils = "^0.0.2"
 accelerate = "^0.33"
+opencv-python = ">= 4.1.1"
 # flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation
 
 

diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -5,6 +5,7 @@
 import pyarrow as pa
 from PIL import Image
 from pathlib import Path
+import cv2
 
 DEFAULT_QUESTION = os.getenv(
  "DEFAULT_QUESTION",
@@ -146,21 +147,36 @@ def main():
  elif encoding == "rgb8":
  channels = 3
  storage_type = np.uint8
+ elif encoding == "jpeg":
+ channels = 3
+ storage_type = np.uint8
  else:
  raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
- frame = (
- storage.to_numpy()
- .astype(storage_type)
- .reshape((height, width, channels))
- )
  if encoding == "bgr8":
- frames[event_id] = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
+ frame = (
+ storage.to_numpy()
+ .astype(storage_type)
+ .reshape((height, width, channels))
+ )
+ frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
  elif encoding == "rgb8":
- pass
+ frame = (
+ storage.to_numpy()
+ .astype(storage_type)
+ .reshape((height, width, channels))
+ )
+ elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+ channels = 3
+ storage_type = np.uint8
+ storage = storage.to_numpy()
+ frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+ frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
  else:
  raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
+ frames[event_id] = frame
+
  elif event_id == "text":
  text = event["value"][0].as_py()
  if text != "":

diff --git a/node-hub/llama-factory-recorder/pyproject.toml b/node-hub/llama-factory-recorder/pyproject.toml
@@ -14,6 +14,7 @@ packages = [{ include = "llama_factory_recorder" }]
 python = "^3.7"
 dora-rs = "^0.3.6"
 pillow = "^10.4.0"
+opencv-python = ">= 4.1.1"
 
 [tool.poetry.scripts]
 llama-factory-recorder = "llama_factory_recorder.main:main"

diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py
@@ -10,6 +10,8 @@
 
 RUNNER_CI = True if os.getenv("CI") == "true" else False
 
+FLIP = os.getenv("FLIP", "")
+
 
 def main():
  # Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables.
@@ -101,10 +103,12 @@ def main():
  1,
  )
 
- metadata = event["metadata"]
- metadata["encoding"] = encoding
- metadata["width"] = int(frame.shape[1])
- metadata["height"] = int(frame.shape[0])
+ if FLIP == "VERTICAL":
+ frame = cv2.flip(frame, 0)
+ elif FLIP == "HORIZONTAL":
+ frame = cv2.flip(frame, 1)
+ elif FLIP == "BOTH":
+ frame = cv2.flip(frame, -1)
 
  # resize the frame
  if (
@@ -116,11 +120,19 @@ def main():
  ):
  frame = cv2.resize(frame, (image_width, image_height))
 
+ metadata = event["metadata"]
+ metadata["encoding"] = encoding
+ metadata["width"] = int(frame.shape[1])
+ metadata["height"] = int(frame.shape[0])
+
  # Get the right encoding
  if encoding == "rgb8":
  frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
- frame = cv2.imencode("." + encoding, frame)[1]
+ ret, frame = cv2.imencode("." + encoding, frame)
+ if not ret:
+ print("Error encoding image...")
+ continue
 
  storage = pa.array(frame.ravel())