Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support grayf32le and gbrapf32le in numpy convertion. #1712

Merged
merged 2 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Features

- Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`).
- Add ``VideoFrame.rotation`` by :gh-user:`lgeiger` in (:pr:`1675`).
- Support grayf32le and gbrapf32le in numpy convertion by :gh-user:`robinechuca` in (:pr:`1712`).


v14.0.1
Expand Down
63 changes: 47 additions & 16 deletions av/video/frame.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ cdef class VideoFrame(Frame):
.. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned,
with the palette being in ARGB (PyAV will swap bytes if needed).

.. note:: For ``gbrp`` formats, channels are flipped to RGB order.

"""
cdef VideoFrame frame = self.reformat(**kwargs)

Expand All @@ -312,29 +314,36 @@ cdef class VideoFrame(Frame):
return np.hstack((
useful_array(frame.planes[0]),
useful_array(frame.planes[1]),
useful_array(frame.planes[2])
useful_array(frame.planes[2]),
)).reshape(-1, frame.height, frame.width)
elif frame.format.name == "yuyv422":
assert frame.width % 2 == 0
assert frame.height % 2 == 0
return useful_array(frame.planes[0], 2).reshape(frame.height, frame.width, -1)
elif frame.format.name == "gbrp":
array = np.empty((frame.height, frame.width, 3), dtype="uint8")
array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(-1, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(-1, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(-1, frame.width)
array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(frame.height, frame.width)
return array
elif frame.format.name in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"):
array = np.empty((frame.height, frame.width, 3), dtype="uint16")
array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(-1, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(-1, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(-1, frame.width)
array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(frame.height, frame.width)
return byteswap_array(array, frame.format.name.endswith("be"))
elif frame.format.name in ("gbrpf32be", "gbrpf32le"):
array = np.empty((frame.height, frame.width, 3), dtype="float32")
array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(-1, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(-1, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(-1, frame.width)
array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width)
return byteswap_array(array, frame.format.name.endswith("be"))
elif frame.format.name in ("gbrapf32be", "gbrapf32le"):
array = np.empty((frame.height, frame.width, 4), dtype="float32")
array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 3] = useful_array(frame.planes[3], 4, "float32").reshape(frame.height, frame.width)
return byteswap_array(array, frame.format.name.endswith("be"))
elif frame.format.name in ("rgb24", "bgr24"):
return useful_array(frame.planes[0], 3).reshape(frame.height, frame.width, -1)
Expand All @@ -345,17 +354,22 @@ cdef class VideoFrame(Frame):
elif frame.format.name in ("gray16be", "gray16le"):
return byteswap_array(
useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width),
frame.format.name == "gray16be",
frame.format.name.endswith("be"),
)
elif frame.format.name in ("grayf32be", "grayf32le"):
return byteswap_array(
useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width),
frame.format.name.endswith("be"),
)
elif frame.format.name in ("rgb48be", "rgb48le"):
return byteswap_array(
useful_array(frame.planes[0], 6, "uint16").reshape(frame.height, frame.width, -1),
frame.format.name == "rgb48be",
frame.format.name.endswith("be"),
)
elif frame.format.name in ("rgba64be", "rgba64le"):
return byteswap_array(
useful_array(frame.planes[0], 8, "uint16").reshape(frame.height, frame.width, -1),
frame.format.name == "rgba64be",
frame.format.name.endswith("be"),
)
elif frame.format.name == "pal8":
image = useful_array(frame.planes[0]).reshape(frame.height, frame.width)
Expand Down Expand Up @@ -491,6 +505,8 @@ cdef class VideoFrame(Frame):
must be in the system's native byte order.

.. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed).

.. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order.
"""
if format == "pal8":
array, palette = array
Expand Down Expand Up @@ -568,19 +584,34 @@ cdef class VideoFrame(Frame):
elif format in ("gray16be", "gray16le"):
check_ndarray(array, "uint16", 2)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format == "gray16be"), frame.planes[0], 2)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 2)
return frame
elif format in ("grayf32be", "grayf32le"):
check_ndarray(array, "float32", 2)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 4)
return frame
elif format in ("rgb48be", "rgb48le"):
check_ndarray(array, "uint16", 3)
check_ndarray_shape(array, array.shape[2] == 3)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format == "rgb48be"), frame.planes[0], 6)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 6)
return frame
elif format in ("rgba64be", "rgba64le"):
check_ndarray(array, "uint16", 3)
check_ndarray_shape(array, array.shape[2] == 4)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format == "rgba64be"), frame.planes[0], 8)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 8)
return frame
elif format in ("gbrapf32be", "gbrapf32le"):
check_ndarray(array, "float32", 3)
check_ndarray_shape(array, array.shape[2] == 4)

frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4)
copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4)
copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4)
copy_array_to_plane(byteswap_array(array[:, :, 3], format.endswith("be")), frame.planes[3], 4)
return frame
elif format == "nv12":
check_ndarray(array, "uint8", 2)
Expand Down
36 changes: 36 additions & 0 deletions tests/test_videoframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,24 @@ def test_ndarray_gray_align() -> None:
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_grayf32() -> None:
array = numpy.random.random_sample(size=(480, 640)).astype(numpy.float32)
for format in ("grayf32be", "grayf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 640 and frame.height == 480
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_grayf32_align() -> None:
array = numpy.random.random_sample(size=(238, 318)).astype(numpy.float32)
for format in ("grayf32be", "grayf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 318 and frame.height == 238
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_rgb() -> None:
array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8)
for format in ("rgb24", "bgr24"):
Expand Down Expand Up @@ -365,6 +383,24 @@ def test_ndarray_gbrpf32_align() -> None:
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_gbrapf32() -> None:
array = numpy.random.random_sample(size=(480, 640, 4)).astype(numpy.float32)
for format in ("gbrapf32be", "gbrapf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 640 and frame.height == 480
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_gbrapf32_allign() -> None:
array = numpy.random.random_sample(size=(238, 318, 4)).astype(numpy.float32)
for format in ("gbrapf32be", "gbrapf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 318 and frame.height == 238
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_yuv420p() -> None:
array = numpy.random.randint(0, 256, size=(720, 640), dtype=numpy.uint8)
frame = VideoFrame.from_ndarray(array, format="yuv420p")
Expand Down
Loading