Skip to content

Commit

Permalink
Add jpeg format to qwenvl2 (#684)
Browse files Browse the repository at this point in the history
Small PR to enable jpeg encoded image communication when using qwenvl2.
  • Loading branch information
haixuanTao authored Oct 10, 2024
2 parents e2b469b + 4dcf2cf commit 2d8b9ee
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 18 deletions.
24 changes: 18 additions & 6 deletions node-hub/dora-qwenvl/dora_qwenvl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pyarrow as pa
from PIL import Image
import cv2

DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct"
CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH)
Expand Down Expand Up @@ -115,18 +116,29 @@ def main():
elif encoding == "rgb8":
channels = 3
storage_type = np.uint8
elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
channels = 3
storage_type = np.uint8
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
if encoding == "bgr8":
frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
elif encoding == "rgb8":
pass
frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
storage = storage.to_numpy()
frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")
frames[event_id] = Image.fromarray(frame)
Expand Down
1 change: 1 addition & 0 deletions node-hub/dora-qwenvl/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ torchvision = "^0.19"
transformers = "^4.45"
qwen-vl-utils = "^0.0.2"
accelerate = "^0.33"
opencv-python = ">= 4.1.1"
# flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation


Expand Down
30 changes: 23 additions & 7 deletions node-hub/llama-factory-recorder/llama_factory_recorder/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pyarrow as pa
from PIL import Image
from pathlib import Path
import cv2

DEFAULT_QUESTION = os.getenv(
"DEFAULT_QUESTION",
Expand Down Expand Up @@ -146,21 +147,36 @@ def main():
elif encoding == "rgb8":
channels = 3
storage_type = np.uint8
elif encoding == "jpeg":
channels = 3
storage_type = np.uint8
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
if encoding == "bgr8":
frames[event_id] = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
elif encoding == "rgb8":
pass
frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
channels = 3
storage_type = np.uint8
storage = storage.to_numpy()
frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

frames[event_id] = frame

elif event_id == "text":
text = event["value"][0].as_py()
if text != "":
Expand Down
1 change: 1 addition & 0 deletions node-hub/llama-factory-recorder/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ packages = [{ include = "llama_factory_recorder" }]
python = "^3.7"
dora-rs = "^0.3.6"
pillow = "^10.4.0"
opencv-python = ">= 4.1.1"

[tool.poetry.scripts]
llama-factory-recorder = "llama_factory_recorder.main:main"
Expand Down
22 changes: 17 additions & 5 deletions node-hub/opencv-video-capture/opencv_video_capture/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

RUNNER_CI = True if os.getenv("CI") == "true" else False

FLIP = os.getenv("FLIP", "")


def main():
# Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables.
Expand Down Expand Up @@ -101,10 +103,12 @@ def main():
1,
)

metadata = event["metadata"]
metadata["encoding"] = encoding
metadata["width"] = int(frame.shape[1])
metadata["height"] = int(frame.shape[0])
if FLIP == "VERTICAL":
frame = cv2.flip(frame, 0)
elif FLIP == "HORIZONTAL":
frame = cv2.flip(frame, 1)
elif FLIP == "BOTH":
frame = cv2.flip(frame, -1)

# resize the frame
if (
Expand All @@ -116,11 +120,19 @@ def main():
):
frame = cv2.resize(frame, (image_width, image_height))

metadata = event["metadata"]
metadata["encoding"] = encoding
metadata["width"] = int(frame.shape[1])
metadata["height"] = int(frame.shape[0])

# Get the right encoding
if encoding == "rgb8":
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
frame = cv2.imencode("." + encoding, frame)[1]
ret, frame = cv2.imencode("." + encoding, frame)
if not ret:
print("Error encoding image...")
continue

storage = pa.array(frame.ravel())

Expand Down

0 comments on commit 2d8b9ee

Please sign in to comment.