Utilize pixman for efficient pixel manipulation #55

Bennctu · 2024-10-01T18:50:02Z

According to issue #6, I replace original image compositing and trapezoid rasterization with Pixman function. Briefly, include:

pixman_image_composite: image compositing
pixman_fill_rect: use pixman_image_fill_rectangles to fill the image with retangles
pixman_image_fill_boxes: span is filled with the boxs

Close #6

We compare the results of the original method with those rewritten using the Pixman library:
(Left image: original method, and right image: Pixman method)

Hello world:
Calculator(Original):
Viewer(Original):
Multi-demo(Original):

configs/Kconfig

jserv · 2024-10-02T00:19:40Z

Consider the performance test program below (apps/perf.c)

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>

#include "twin.h"

#define TEST_PIX_WIDTH 1200
#define TEST_PIX_HEIGHT 800

static twin_pixmap_t *src32, *dst32;
static int twidth, theight, titers;

static void test_argb32_source_argb32(void)
{
    twin_operand_t srco = {.source_kind = TWIN_PIXMAP, .u.pixmap = src32};
    twin_composite(dst32, 0, 0, &srco, 0, 0, NULL, 0, 0, TWIN_SOURCE, twidth,
                   theight);
}

static void test_argb32_over_argb32(void)
{
    twin_operand_t srco = {.source_kind = TWIN_PIXMAP, .u.pixmap = src32};
    twin_composite(dst32, 0, 0, &srco, 0, 0, NULL, 0, 0, TWIN_OVER, twidth,
                   theight);
}

static void do_test(const char *name, void (*test)(void))
{
    struct timeval start, end;
    unsigned long long sus, eus;
    char spc[128];
    char *s;
    int i;

    printf("%s", name);

    gettimeofday(&start, NULL);
    for (i = 0; i < titers; i++)
        test();
    gettimeofday(&end, NULL);
    sus = (unsigned long long) start.tv_sec * 1000000ull + start.tv_usec;
    eus = (unsigned long long) end.tv_sec * 1000000ull + end.tv_usec;

    s = spc;
    for (i = strlen(name); i < 40; i++)
        *(s++) = ' ';
    *s = 0;
    printf("%s %f sec\n", spc, ((float) (eus - sus)) / 1000000.0);
}

#define DO_TEST(name) do_test(#name, test_##name)

static void do_tests(int width, int height, int iters)
{
    twidth = width;
    theight = height;
    titers = iters;

    DO_TEST(argb32_source_argb32);
    DO_TEST(argb32_over_argb32);
}

static void do_all_tests(const char *title)
{
    printf("[ %s: 10x10x1000000 ]\n", title);
    do_tests(10, 10, 1000000);

    printf("[ %s: 100x100x20000 ]\n", title);
    do_tests(100, 100, 20000);

    printf("[ %s: 200x200x10000 ]\n", title);
    do_tests(200, 200, 10000);

    printf("[ %s: 1200x800x200 ]\n", title);
    do_tests(1200, 800, 200);

    printf("\n");
}

int main(void)
{
    /* Create some test pixmaps */
    src32 = twin_pixmap_from_file("assets/tux.png", TWIN_ARGB32);
    assert(src32);
    dst32 = twin_pixmap_create(TWIN_ARGB32, TEST_PIX_WIDTH, TEST_PIX_HEIGHT);
    assert(dst32);

    /* fill pixmaps */
    twin_fill(dst32, 0x80112233, TWIN_SOURCE, 0, 0, TEST_PIX_WIDTH,
              TEST_PIX_HEIGHT);

    /* pre-touch data */
    test_argb32_source_argb32();

    do_all_tests("Pixmap");

    return 0;
}

Enable it by turning off the existing demo programs. (You might integrate the above into apps/main.c though.)

--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,7 @@ endif
 ifeq ($(CONFIG_DEMO_APPLICATIONS), y)
 target-y += demo-$(BACKEND)
 demo-$(BACKEND)_depends-y += $(target.a-y)
-demo-$(BACKEND)_files-y = apps/main.c
+demo-$(BACKEND)_files-y = apps/perf.c
 demo-$(BACKEND)_includes-y := include
 demo-$(BACKEND)_ldflags-y := \
        $(target.a-y) \

Reference test results with the builtin compositing:

[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.130208 sec
argb32_over_argb32                       0.273309 sec
[ Pixmap: 100x100x20000 ]
argb32_source_argb32                     0.025439 sec
argb32_over_argb32                       0.431355 sec
[ Pixmap: 200x200x10000 ]
argb32_source_argb32                     0.087813 sec
argb32_over_argb32                       0.839746 sec
[ Pixmap: 1200x800x200 ]
argb32_source_argb32                     0.022176 sec
argb32_over_argb32                       0.395362 sec

jserv · 2024-10-02T00:42:18Z

Promote Pixman option in Kconfig:

--- a/configs/Kconfig
+++ b/configs/Kconfig
@@ -16,6 +16,18 @@ config BACKEND_SDL
 
 endchoice
 
+choice
+    prompt "Renderer Selection"
+    default RENDERER_BUILTIN
+
+config RENDERER_BUILTIN
+    bool "Built-in pixel manipulation"
+
+config RENDERER_PIXMAN
+    bool "Pixman based rendering"
+
+endchoice
+
 menu "Features"
 
 config LOGGING
@@ -39,10 +51,6 @@ config CURSOR
     bool "Manipulate cursor"
     default n
 
-config PIXMAN
-    bool "Pixman to pixel manipulation"
-    default y
-
 endmenu
 
 menu "Image Loaders"

Once the RENDERER_PIXMAN option is enabled, the built-in pixel manipulation code should not be compiled, and you should ensure that all rendering is done through the Pixman path.

apps/main.c

src/widget.c

jserv · 2024-10-02T13:10:19Z

Pixman is a low-level library for pixel manipulation, including image compositing and trapezoid rasterization. It serves as the backend for Cairo's in-memory rendering surface and is a binary dependency of Cairo. Mado can be seen as a simplified version of Cairo, and the proposed change introduces an efficient rendering path as an optional feature for faster vector graphics. The original pixel manipulation code should be retained for resource-constrained environments, with the potential for integration into the Linux kernel as an in-kernel vector graphics module.

The proposed change adds a configurable option to switch between the built-in (or classical) renderer and the Pixman-based renderer. Applications using Mado should maintain source-level compatibility. The action items for this pull request include:

Utilize pixman_image_composite, pixman_fill_rect, pixman_image_fill_rectangles, etc., within Mado's internal implementation, ensuring no changes to the public API and maintaining source-level compatibility.
Develop a simple performance benchmark program to validate and measure the efficiency of pixel manipulations, enabling comparisons between the built-in and Pixman-based renderers. This task can be done in another pull request.
Ensure the conditional build system works as expected, generating the necessary renderer-specific code without unnecessary code bloat when configured.

Makefile

README.md

src/pixman.c

README.md

jserv · 2024-10-05T07:50:19Z

I am experiencing rendering accuracy issues when switching from the built-in renderer to the Pixman-based one.

The built-in renderer

The Pixman-based renderer

As shown in the diagrams above, the splines rendered using the Pixman-based path are not as accurate as those rendered with the built-in renderer.

jserv · 2024-10-05T09:04:59Z

Evaluate the feasibility to replace the primitive operations, such as _twin_rgb16_source_argb32, _twin_argb32_over_argb32, and _twin_argb32_source_argb32, with Pixman counterpart. Eventually, the file src/primitive.c can be omitted if Pixman-based render path is selected.

Makefile

Bennctu · 2024-10-06T14:09:28Z

#55 (comment)
It results from _span_fill in pixman.c. I don't add new _span_fill method which uses pixman function.

jserv · 2024-10-06T14:12:04Z

src/pixmap.c

@@ -13,6 +13,10 @@ twin_pixmap_t *twin_pixmap_create(twin_format_t format,
                                  twin_coord_t height)
 {
    twin_coord_t stride = twin_bytes_per_pixel(format) * width;
+    /* Padding the stride to a multiple of 4 bytes */


Why do you think it is necessary? Explain and consider to submit another pull request for generic pixmap manipulation.

If we comment src/pixmap.c#L17,L18, it will happen _pixman_log_error as follow:

*** BUG *** In create_bits_image_internal: The expression bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0 was false Set a breakpoint on '_pixman_log_error' to debug [1] 62492 segmentation fault (core dumped) ./demo-sdl

pixman_image_t *msk fails to be allocated in /src/pixman.c#L102. Finally, pixman_image_unref will access a nullptr, and segmentation fault occurs in /src/pixman.c#L107

Do not use screenshots for plain text content, as this is inaccessible to visually impaired users.

If we comment it, it will happen _pixman_log_error as follow:
...
pixman_image_t *msk fails to be allocated. Finally, pixman_image_unref will access a nullptr, and segmentation fault occurs.

Next, you should check the pointer alignment requirements for Pixman and consider providing the corresponding macros for alignment. See mimalloc for reference.

jserv · 2024-10-06T14:19:25Z

I don't add new _span_fill method which uses pixman function.

Show preliminary benchmark results for comparison purposes.

Bennctu · 2024-10-06T16:18:00Z

#55 (comment)

_span_fill with pixman function in /src/pixman.c

_span_fill with the builtin method in poly.c

(Notice that we test in the pixman configuration)

Makefile

jserv · 2024-10-06T22:01:25Z

src/pixman.c

+}
+
+/* Same function in draw.c */
+static twin_argb32_t _twin_apply_alpha(twin_argb32_t v)


See if the function can be accelerated by Pixman.

Observing the difference between using (left image) and not using (right image) _twin_apply_alpha, we know that _twin_apply_alpha performs the conversion from ABGR to ARGB.

According to the implementation in _twin_apply_alpha,

return alpha << 24 | twin_int_mult(twin_get_8(v, 0), alpha, t1) << 16 | twin_int_mult(twin_get_8(v, 8), alpha, t2) << 8 | twin_int_mult(twin_get_8(v, 16), alpha, t3) << 0;

it also multiplies RBG channels with alpha value.

However, the public API in pixman.h doesn't provide the functions about ABGR to ARGB conversion and alpha premultiplication for user.

we know that _twin_apply_alpha performs the conversion from ABGR to ARGB.

Check the inner function _convertBGRtoARGB in file src/image-png.c which was a workaround for macOS.

I ran mado with gdb. The function _convertBGRtoARGB isn't executed because my operating system is not macOS.

/src/pixman.c#L172 is executed because my computer is a little-endian machine. The PNG data is originally in RGBA format as the following image, but it becomes ABGR format on a little-endian machine. In /src/pixman.c#L172, mado has already performed the ABGR-to-ARGB conversion for the requirement of the pixmap's pixel data.

Bennctu · 2024-10-13T17:01:24Z

#55 (comment)

Evaluate the feasibility to replace the primitive operations, such as , , and , with Pixman counterpart. Eventually, the file can be omitted if Pixman-based render path is selected._twin_rgb16_source_argb32``_twin_argb32_over_argb32``_twin_argb32_source_argb32``src/primitive.c

According to pixman.h, the in or over operator don't exist in public API. Because _twin_rgb16_source_argb32, _twin_argb32_over_argb32, and _twin_argb32_source_argb32 are lower-level functions, we can only call pixman_image_composite (which can replace twin_composite in draw.c) to use in or over operation.

src/pixmap.c

jserv · 2024-10-15T13:25:18Z

According to pixman.h, the in or over operator don't exist in public API. Because _twin_rgb16_source_argb32, _twin_argb32_over_argb32, and _twin_argb32_source_argb32 are lower-level functions, we can only call pixman_image_composite (which can replace twin_composite in draw.c) to use in or over operation.

Can you provide preliminary benchmark results for evaluation purpose?

Bennctu · 2024-10-15T16:21:41Z

According to pixman.h, the in or over operator don't exist in public API. Because _twin_rgb16_source_argb32, _twin_argb32_over_argb32, and _twin_argb32_source_argb32 are lower-level functions, we can only call pixman_image_composite (which can replace twin_composite in draw.c) to use in or over operation.

Can you provide preliminary benchmark results for evaluation purpose?

The following evaluation results come from this test code:

# built-in method
[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.076773 sec
argb32_over_argb32                       0.130076 sec
[ Pixmap: 100x100x20000 ]
argb32_source_argb32                     0.065616 sec
argb32_over_argb32                       0.176866 sec
[ Pixmap: 200x200x10000 ]
argb32_source_argb32                     0.147732 sec
argb32_over_argb32                       0.352873 sec
[ Pixmap: 1200x800x200 ]
argb32_source_argb32                     0.058365 sec
argb32_over_argb32                       0.458145 sec

# pixman
[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.162651 sec
argb32_over_argb32                       0.149407 sec
[ Pixmap: 100x100x20000 ]
argb32_source_argb32                     0.020530 sec
argb32_over_argb32                       0.053927 sec
[ Pixmap: 200x200x10000 ]
argb32_source_argb32                     0.051592 sec
argb32_over_argb32                       0.132560 sec
[ Pixmap: 1200x800x200 ]
argb32_source_argb32                     0.041629 sec
argb32_over_argb32                       0.058690 sec

jserv · 2024-10-15T21:48:21Z

The following evaluation results come from this test code:

# built-in method
[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.076773 sec
argb32_over_argb32                       0.130076 sec
# pixman
[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.162651 sec
argb32_over_argb32                       0.149407 sec

Can you explain the test item 10x10x1000000?

Bennctu · 2024-10-21T13:37:10Z

The following evaluation results come from this test code:

# built-in method
[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.076773 sec
argb32_over_argb32                       0.130076 sec
# pixman
[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.162651 sec
argb32_over_argb32                       0.149407 sec

Can you explain the test item 10x10x1000000?

The following results show that it results from the allocation of pixman_image_t for src and dst in 'twin_composite' of pixman.c:

In small compositing size, the variation of the time consumption for pixman method is less than built-in method:

#Pixman
[ Pixmap: 7x7x1000000 ]
argb32_source_argb32                     0.169647 sec
argb32_over_argb32                       0.144932 sec
[ Pixmap: 9x9x1000000 ]
argb32_source_argb32                     0.151711 sec
argb32_over_argb32                       0.150564 sec
[ Pixmap: 11x11x1000000 ]
argb32_source_argb32                     0.169914 sec
argb32_over_argb32                       0.160988 sec
[ Pixmap: 13x13x1000000 ]
argb32_source_argb32                     0.172802 sec
argb32_over_argb32                       0.163671 sec
[ Pixmap: 15x15x1000000 ]
argb32_source_argb32                     0.194813 sec
argb32_over_argb32                       0.191646 sec
[ Pixmap: 17x17x1000000 ]
argb32_source_argb32                     0.183014 sec
argb32_over_argb32                       0.186015 sec
//////////////////////////////////////////////////////
#builtin
[ Pixmap: 7x7x1000000 ]
argb32_source_argb32                     0.054997 sec
argb32_over_argb32                       0.089470 sec
[ Pixmap: 9x9x1000000 ]
argb32_source_argb32                     0.070971 sec
argb32_over_argb32                       0.121374 sec
[ Pixmap: 11x11x1000000 ]
argb32_source_argb32                     0.088222 sec
argb32_over_argb32                       0.162219 sec
[ Pixmap: 13x13x1000000 ]
argb32_source_argb32                     0.112656 sec
argb32_over_argb32                       0.224784 sec
[ Pixmap: 15x15x1000000 ]
argb32_source_argb32                     0.130276 sec
argb32_over_argb32                       0.281983 sec
[ Pixmap: 17x17x1000000 ]
argb32_source_argb32                     0.161015 sec
argb32_over_argb32                       0.345266 sec

We find that the allocation of src and dst consumes 0.06 sec (with iters = 1000000) approximately if we comment on 'twin_composite' of pixman.c

[ Pixmap: 7x7x1000000 ]
argb32_source_argb32                     0.073393 sec
argb32_over_argb32                       0.067797 sec
[ Pixmap: 9x9x1000000 ]
argb32_source_argb32                     0.074549 sec
argb32_over_argb32                       0.062425 sec
[ Pixmap: 11x11x1000000 ]
argb32_source_argb32                     0.062554 sec
argb32_over_argb32                       0.063226 sec

When compositing with smaller size, the allocation time of src and dst is larger proportion in total time consumption. Only doing pixman_image_composite in twin_composite result is close to built-in method:

[ Pixmap: 10x10x1000000 ]
argb32_source_argb32                     0.076398 sec
argb32_over_argb32                       0.078906 sec
[ Pixmap: 100x100x20000 ]
argb32_source_argb32                     0.020046 sec
argb32_over_argb32                       0.054756 sec
[ Pixmap: 200x200x10000 ]
argb32_source_argb32                     0.050169 sec
argb32_over_argb32                       0.133911 sec
[ Pixmap: 1200x800x200 ]
argb32_source_argb32                     0.046361 sec
argb32_over_argb32                       0.059155 sec

Generally, the results with pixman method are better than the built-in method.

jserv · 2024-10-23T07:25:27Z

Generally, the results with pixman method are better than the built-in method.

We should integrate the performance suite before merging the proposed Pixman-based renderer to analyze its performance behavior and identify any regressions. Please submit a separate pull request for that purpose.

jserv · 2024-11-03T20:28:45Z

I plan to merge this pull request for further performance evaluation. Meanwhile, I would create new issue(s) to address pixman specific work. Some of them would be as following:

Evaluate the feasibility to replace the primitive operations, such as _twin_rgb16_source_argb32, _twin_argb32_over_argb32, and _twin_argb32_source_argb32, with Pixman counterpart.
the file src/primitive.c can be omitted if Pixman-based render path is selected.
Benchmarking! Locate the performance hotspot.

jserv

Rebase the latest main branch, resolve conflicts, and highlight the switch between the built-in renderer and the Pixman-based implementation. For RISC-V platforms without vector or SIMD extensions, the built-in renderer offers simplicity and works well. On modern CPU architectures with rich ISA extensions, Pixman provides faster pixel manipulation, making the switch to the Pixman-based implementation advantageous.

Bennctu · 2024-11-04T14:50:49Z

Rebase the latest main branch, resolve conflicts, and highlight the switch between the built-in renderer and the Pixman-based implementation. For RISC-V platforms without vector or SIMD extensions, the built-in renderer offers simplicity and works well. On modern CPU architectures with rich ISA extensions, Pixman provides faster pixel manipulation, making the switch to the Pixman-based implementation advantageous.

Could I refer to part of your sentence as a reference and include it in the commit message?

jserv · 2024-11-04T17:20:03Z

Could I refer to part of your sentence as a reference and include it in the commit message?

Sure! It is collaboration.

jserv

Don't say "According to issue #6." This pull request was linked to that already.

Don't say "I replace original image compositing with Pixman function." Always write down commit message in third-person point of view.

jserv

Show (partial) benchmark results in git commit messages.

Replace original image compositing with Pixman function. Breifly, include: - pixman_image_composite: image compositing - pixman_image_fill_rectangles: fill the image with rectangles Originally, the built-in renderer offers simplicity and works well for RISC-V platforms without vector or SIMD extensions. However, modern CPU architectures have rich ISA extensions, and Pixman provides faster pixel manipulation. Hence, we support Pixman-based implementation to make it advantageous. We just switch to Pixman based rendering in Mado system configuration. The following benchmark results as: width, height, iteration, operation | built-in (sec) | pixman (sec) | --------------------------------------------------------------------- 100 , 100 , 20000 , source | 0.065616 | 0.020530 | , over | 0.176866 | 0.053927 | --------------------------------------------------------------------- 200 , 200 , 10000 , source | 0.147732 | 0.051592 | , over | 0.352873 | 0.132560 | --------------------------------------------------------------------- 1200 , 800 , 200 , source | 0.058365 | 0.041629 | , over | 0.458145 | 0.058690 | --------------------------------------------------------------------- Note: the table shows that Pixman method is better than built-in method for image compositing runtime. Close sysprog21#6

jserv · 2024-11-05T17:08:03Z

Thank @Bennctu for contributing!

Bennctu force-pushed the pixmap_dev branch from 2615b1d to 6332572 Compare October 1, 2024 19:10

jserv reviewed Oct 2, 2024

View reviewed changes

configs/Kconfig Outdated Show resolved Hide resolved

jserv reviewed Oct 2, 2024

View reviewed changes

apps/main.c Outdated Show resolved Hide resolved

ndsl7109256 reviewed Oct 2, 2024

View reviewed changes

src/widget.c Outdated Show resolved Hide resolved

Bennctu force-pushed the pixmap_dev branch from 6332572 to 9a9b5de Compare October 5, 2024 07:17

jserv reviewed Oct 5, 2024

View reviewed changes

Makefile Outdated Show resolved Hide resolved

jserv reviewed Oct 5, 2024

View reviewed changes

Makefile Outdated Show resolved Hide resolved

jserv reviewed Oct 5, 2024

View reviewed changes

README.md Outdated Show resolved Hide resolved

jserv reviewed Oct 5, 2024

View reviewed changes

src/pixman.c Outdated Show resolved Hide resolved

jserv reviewed Oct 5, 2024

View reviewed changes

README.md Outdated Show resolved Hide resolved

This comment was marked as resolved.

Sign in to view

This comment was marked as outdated.

Sign in to view

Bennctu force-pushed the pixmap_dev branch from 9a9b5de to fb166c3 Compare October 6, 2024 14:01

jserv reviewed Oct 6, 2024

View reviewed changes

Makefile Outdated Show resolved Hide resolved

jserv reviewed Oct 6, 2024

View reviewed changes

Bennctu force-pushed the pixmap_dev branch from fb166c3 to 7ccf36e Compare October 6, 2024 14:59

jserv reviewed Oct 6, 2024

View reviewed changes

Makefile Outdated Show resolved Hide resolved

jserv reviewed Oct 6, 2024

View reviewed changes

jserv mentioned this pull request Oct 6, 2024

Add the Gaussian function to blur a pixmap #58

Open

Bennctu force-pushed the pixmap_dev branch from 7ccf36e to fdbbc9d Compare October 13, 2024 17:35

jserv reviewed Oct 13, 2024

View reviewed changes