Adding opencv dependency to qwenvl2

dora-rs · Oct 9, 2024 · ce408a1 · ce408a1
1 parent 3d6360d
commit ce408a1
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 1 deletion.
diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pyarrow as pa
 from PIL import Image
+import cv2
 
 DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct"
 CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH)
@@ -127,6 +128,11 @@ def main():
  frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
  elif encoding == "rgb8":
  pass
+ elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+ channels = 3
+ storage_type = np.uint8
+ storage = storage.to_numpy()
+ frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
  else:
  raise RuntimeError(f"Unsupported image encoding: {encoding}")
  frames[event_id] = Image.fromarray(frame)

diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml
@@ -19,6 +19,7 @@ torchvision = "^0.19"
 transformers = "^4.45"
 qwen-vl-utils = "^0.0.2"
 accelerate = "^0.33"
+opencv-python = ">= 4.1.1"
 # flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation
 
 

diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -5,6 +5,7 @@
 import pyarrow as pa
 from PIL import Image
 from pathlib import Path
+import cv2
 
 DEFAULT_QUESTION = os.getenv(
  "DEFAULT_QUESTION",
@@ -155,12 +156,19 @@ def main():
  .reshape((height, width, channels))
  )
  if encoding == "bgr8":
- frames[event_id] = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
+ frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
  elif encoding == "rgb8":
  pass
+ elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+ channels = 3
+ storage_type = np.uint8
+ storage = storage.to_numpy()
+ frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
  else:
  raise RuntimeError(f"Unsupported image encoding: {encoding}")
 
+ frames[event_id] = frame
+
  elif event_id == "text":
  text = event["value"][0].as_py()
  if text != "":

diff --git a/node-hub/llama-factory-recorder/pyproject.toml b/node-hub/llama-factory-recorder/pyproject.toml
@@ -14,6 +14,7 @@ packages = [{ include = "llama_factory_recorder" }]
 python = "^3.7"
 dora-rs = "^0.3.6"
 pillow = "^10.4.0"
+opencv-python = ">= 4.1.1"
 
 [tool.poetry.scripts]
 llama-factory-recorder = "llama_factory_recorder.main:main"