Skip to content

Commit

Permalink
Release 1.0.7
Browse files Browse the repository at this point in the history
* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD.
* Implemented fill_rgba and fill_hsla for AArch64 ASIMD.
* Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD.
* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.
  • Loading branch information
sadko4u committed Sep 11, 2022
2 parents a364278 + 9b29544 commit 2afc956
Show file tree
Hide file tree
Showing 31 changed files with 6,359 additions and 3,652 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
* RECENT CHANGES
*******************************************************************************

=== 1.0.7 ===
* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD.
* Implemented fill_rgba and fill_hsla for AArch64 ASIMD.
* Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD.
* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.

=== 1.0.6 ===
* Updated build scripts.

Expand Down
2 changes: 1 addition & 1 deletion include/lsp-plug.in/dsp/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
// Define version of headers
#define LSP_DSP_LIB_MAJOR 1
#define LSP_DSP_LIB_MINOR 0
#define LSP_DSP_LIB_MICRO 6
#define LSP_DSP_LIB_MICRO 7

#if defined(__WINDOWS__) || defined(__WIN32__) || defined(__WIN64__) || defined(_WIN64) || defined(_WIN32) || defined(__WINNT) || defined(__WINNT__)
#define LSP_DSP_LIB_EXPORT_MODIFIER __declspec(dllexport)
Expand Down
2 changes: 2 additions & 0 deletions include/private/dsp/arch/aarch64/asimd/filters/dynamic.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#error "This header should not be included directly"
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */

#include <private/dsp/arch/aarch64/asimd/filters/static.h>

namespace lsp
{
namespace asimd
Expand Down
31 changes: 0 additions & 31 deletions include/private/dsp/arch/aarch64/asimd/graphics.h

This file was deleted.

963 changes: 963 additions & 0 deletions include/private/dsp/arch/aarch64/asimd/graphics/axis.h

Large diffs are not rendered by default.

724 changes: 724 additions & 0 deletions include/private/dsp/arch/aarch64/asimd/graphics/colors.h

Large diffs are not rendered by default.

545 changes: 545 additions & 0 deletions include/private/dsp/arch/aarch64/asimd/graphics/effects.h

Large diffs are not rendered by default.

120 changes: 120 additions & 0 deletions include/private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,126 @@ namespace lsp
{
namespace asimd
{
IF_ARCH_AARCH64(
static const uint32_t rgba32_to_bgra32_const[] __lsp_aligned16 =
{
LSP_DSP_VEC4(0x00ff00ff),
LSP_DSP_VEC4(0x00ff00ff),
};
);

void rgba32_to_bgra32(void *dst, const void *src, size_t count)
{
ARCH_AARCH64_ASM(
__ASM_EMIT("ldp q16, q17, [%[XC]]")

// 32x blocks
__ASM_EMIT("subs %[count], %[count], #32")
__ASM_EMIT("b.lo 2f")
__ASM_EMIT("1:")
__ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
__ASM_EMIT("ldp q2, q3, [%[src], 0x20]")
__ASM_EMIT("ldp q4, q5, [%[src], 0x40]")
__ASM_EMIT("ldp q6, q7, [%[src], 0x60]")
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
__ASM_EMIT("rev32 v9.8h, v1.8h")
__ASM_EMIT("rev32 v10.8h, v2.8h")
__ASM_EMIT("rev32 v11.8h, v3.8h")
__ASM_EMIT("rev32 v12.8h, v4.8h")
__ASM_EMIT("rev32 v13.8h, v5.8h")
__ASM_EMIT("rev32 v14.8h, v6.8h")
__ASM_EMIT("rev32 v15.8h, v7.8h")
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
__ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
__ASM_EMIT("bit v2.16b, v10.16b, v16.16b")
__ASM_EMIT("bit v3.16b, v11.16b, v17.16b")
__ASM_EMIT("bit v4.16b, v12.16b, v16.16b")
__ASM_EMIT("bit v5.16b, v13.16b, v17.16b")
__ASM_EMIT("bit v6.16b, v14.16b, v16.16b")
__ASM_EMIT("bit v7.16b, v15.16b, v17.16b")
__ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
__ASM_EMIT("stp q2, q3, [%[dst], 0x20]")
__ASM_EMIT("stp q4, q5, [%[dst], 0x40]")
__ASM_EMIT("stp q6, q7, [%[dst], 0x60]")
__ASM_EMIT("subs %[count], %[count], #32")
__ASM_EMIT("add %[src], %[src], 0x80")
__ASM_EMIT("add %[dst], %[dst], 0x80")
__ASM_EMIT("b.hs 1b")

// 16x blocks
__ASM_EMIT("2:")
__ASM_EMIT("adds %[count], %[count], #16")
__ASM_EMIT("b.lt 4f")
__ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
__ASM_EMIT("ldp q2, q3, [%[src], 0x20]")
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
__ASM_EMIT("rev32 v9.8h, v1.8h")
__ASM_EMIT("rev32 v10.8h, v2.8h")
__ASM_EMIT("rev32 v11.8h, v3.8h")
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
__ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
__ASM_EMIT("bit v2.16b, v10.16b, v16.16b")
__ASM_EMIT("bit v3.16b, v11.16b, v17.16b")
__ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
__ASM_EMIT("stp q2, q3, [%[dst], 0x20]")
__ASM_EMIT("sub %[count], %[count], #16")
__ASM_EMIT("add %[src], %[src], 0x40")
__ASM_EMIT("add %[dst], %[dst], 0x40")

// 8x blocks
__ASM_EMIT("4:")
__ASM_EMIT("adds %[count], %[count], #8")
__ASM_EMIT("b.lt 6f")
__ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
__ASM_EMIT("rev32 v9.8h, v1.8h")
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
__ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
__ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
__ASM_EMIT("sub %[count], %[count], #8")
__ASM_EMIT("add %[src], %[src], 0x20")
__ASM_EMIT("add %[dst], %[dst], 0x20")

// 4x blocks
__ASM_EMIT("6:")
__ASM_EMIT("adds %[count], %[count], #4")
__ASM_EMIT("b.lt 8f")
__ASM_EMIT("ldr q0, [%[src], 0x00]") // v0 = R G B A
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
__ASM_EMIT("str q0, [%[dst], 0x00]")
__ASM_EMIT("sub %[count], %[count], #4")
__ASM_EMIT("add %[src], %[src], 0x10")
__ASM_EMIT("add %[dst], %[dst], 0x10")

// 1x blocks
__ASM_EMIT("8:")
__ASM_EMIT("adds %[count], %[count], #3")
__ASM_EMIT("b.lt 10f")
__ASM_EMIT("9:")
__ASM_EMIT("ld1r {v0.4s}, [%[src]]") // v0 = R G B A
__ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
__ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
__ASM_EMIT("st1 {v0.s}[0], [%[dst]]")
__ASM_EMIT("add %[src], %[src], 0x04")
__ASM_EMIT("add %[dst], %[dst], 0x04")
__ASM_EMIT("subs %[count], %[count], #1")
__ASM_EMIT("b.ge 9b")

// End
__ASM_EMIT("10:")
: [src] "+r" (src), [dst] "+r" (dst),
[count] "+r" (count)
: [XC] "r" (&rgba32_to_bgra32_const[0])
: "cc", "memory",
"v0", "v1", "v2", "v3",
"v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11",
"v12", "v13", "v14", "v15",
"v16", "v17"
);
}

IF_ARCH_AARCH64(
static const uint32_t abgr32_to_bgrff32_const[] __lsp_aligned32 =
{
Expand Down
1 change: 1 addition & 0 deletions include/private/dsp/arch/arm/neon-d32/fastconv.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#error "This header should not be included directly"
#endif /* PRIVATE_DSP_ARCH_ARM_NEON_D32_IMPL */

#include <private/dsp/arch/arm/neon-d32/fft/const.h>
#include <private/dsp/arch/arm/neon-d32/fastconv/parse.h>
#include <private/dsp/arch/arm/neon-d32/fastconv/restore.h>
#include <private/dsp/arch/arm/neon-d32/fastconv/apply.h>
Expand Down
Loading

0 comments on commit 2afc956

Please sign in to comment.