Release 1.0.7

* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD. * Implemented fill_rgba and fill_hsla for AArch64 ASIMD. * Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD. * Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.
lsp-plugins · Sep 11, 2022 · 2afc956 · 2afc956
2 parents a364278 + 9b29544
commit 2afc956
Show file tree

Hide file tree

Showing 31 changed files with 6,359 additions and 3,652 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,12 @@
 * RECENT CHANGES
 *******************************************************************************
 
+=== 1.0.7 ===
+* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD.
+* Implemented fill_rgba and fill_hsla for AArch64 ASIMD.
+* Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD.
+* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.
+
 === 1.0.6 ===
 * Updated build scripts.
 

diff --git a/include/lsp-plug.in/dsp/version.h b/include/lsp-plug.in/dsp/version.h
@@ -25,7 +25,7 @@
 // Define version of headers
 #define LSP_DSP_LIB_MAJOR 1
 #define LSP_DSP_LIB_MINOR 0
-#define LSP_DSP_LIB_MICRO 6
+#define LSP_DSP_LIB_MICRO 7
 
 #if defined(__WINDOWS__) || defined(__WIN32__) || defined(__WIN64__) || defined(_WIN64) || defined(_WIN32) || defined(__WINNT) || defined(__WINNT__)
  #define LSP_DSP_LIB_EXPORT_MODIFIER __declspec(dllexport)

diff --git a/include/private/dsp/arch/aarch64/asimd/filters/dynamic.h b/include/private/dsp/arch/aarch64/asimd/filters/dynamic.h
@@ -26,6 +26,8 @@
  #error "This header should not be included directly"
 #endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_IMPL */
 
+#include <private/dsp/arch/aarch64/asimd/filters/static.h>
+
 namespace lsp
 {
  namespace asimd

diff --git a/include/private/dsp/arch/aarch64/asimd/graphics.h b/include/private/dsp/arch/aarch64/asimd/graphics.h
diff --git a/include/private/dsp/arch/aarch64/asimd/graphics/axis.h b/include/private/dsp/arch/aarch64/asimd/graphics/axis.h
diff --git a/include/private/dsp/arch/aarch64/asimd/graphics/colors.h b/include/private/dsp/arch/aarch64/asimd/graphics/colors.h
diff --git a/include/private/dsp/arch/aarch64/asimd/graphics/effects.h b/include/private/dsp/arch/aarch64/asimd/graphics/effects.h
diff --git a/include/private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h b/include/private/dsp/arch/aarch64/asimd/graphics/pixelfmt.h
@@ -30,6 +30,126 @@ namespace lsp
 {
  namespace asimd
  {
+ IF_ARCH_AARCH64(
+ static const uint32_t rgba32_to_bgra32_const[] __lsp_aligned16 =
+ {
+ LSP_DSP_VEC4(0x00ff00ff),
+ LSP_DSP_VEC4(0x00ff00ff),
+ };
+ );
+
+ void rgba32_to_bgra32(void *dst, const void *src, size_t count)
+ {
+ ARCH_AARCH64_ASM(
+ __ASM_EMIT("ldp q16, q17, [%[XC]]")
+
+ // 32x blocks
+ __ASM_EMIT("subs %[count], %[count], #32")
+ __ASM_EMIT("b.lo 2f")
+ __ASM_EMIT("1:")
+ __ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
+ __ASM_EMIT("ldp q2, q3, [%[src], 0x20]")
+ __ASM_EMIT("ldp q4, q5, [%[src], 0x40]")
+ __ASM_EMIT("ldp q6, q7, [%[src], 0x60]")
+ __ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
+ __ASM_EMIT("rev32 v9.8h, v1.8h")
+ __ASM_EMIT("rev32 v10.8h, v2.8h")
+ __ASM_EMIT("rev32 v11.8h, v3.8h")
+ __ASM_EMIT("rev32 v12.8h, v4.8h")
+ __ASM_EMIT("rev32 v13.8h, v5.8h")
+ __ASM_EMIT("rev32 v14.8h, v6.8h")
+ __ASM_EMIT("rev32 v15.8h, v7.8h")
+ __ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
+ __ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
+ __ASM_EMIT("bit v2.16b, v10.16b, v16.16b")
+ __ASM_EMIT("bit v3.16b, v11.16b, v17.16b")
+ __ASM_EMIT("bit v4.16b, v12.16b, v16.16b")
+ __ASM_EMIT("bit v5.16b, v13.16b, v17.16b")
+ __ASM_EMIT("bit v6.16b, v14.16b, v16.16b")
+ __ASM_EMIT("bit v7.16b, v15.16b, v17.16b")
+ __ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
+ __ASM_EMIT("stp q2, q3, [%[dst], 0x20]")
+ __ASM_EMIT("stp q4, q5, [%[dst], 0x40]")
+ __ASM_EMIT("stp q6, q7, [%[dst], 0x60]")
+ __ASM_EMIT("subs %[count], %[count], #32")
+ __ASM_EMIT("add %[src], %[src], 0x80")
+ __ASM_EMIT("add %[dst], %[dst], 0x80")
+ __ASM_EMIT("b.hs 1b")
+
+ // 16x blocks
+ __ASM_EMIT("2:")
+ __ASM_EMIT("adds %[count], %[count], #16")
+ __ASM_EMIT("b.lt 4f")
+ __ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
+ __ASM_EMIT("ldp q2, q3, [%[src], 0x20]")
+ __ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
+ __ASM_EMIT("rev32 v9.8h, v1.8h")
+ __ASM_EMIT("rev32 v10.8h, v2.8h")
+ __ASM_EMIT("rev32 v11.8h, v3.8h")
+ __ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
+ __ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
+ __ASM_EMIT("bit v2.16b, v10.16b, v16.16b")
+ __ASM_EMIT("bit v3.16b, v11.16b, v17.16b")
+ __ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
+ __ASM_EMIT("stp q2, q3, [%[dst], 0x20]")
+ __ASM_EMIT("sub %[count], %[count], #16")
+ __ASM_EMIT("add %[src], %[src], 0x40")
+ __ASM_EMIT("add %[dst], %[dst], 0x40")
+
+ // 8x blocks
+ __ASM_EMIT("4:")
+ __ASM_EMIT("adds %[count], %[count], #8")
+ __ASM_EMIT("b.lt 6f")
+ __ASM_EMIT("ldp q0, q1, [%[src], 0x00]") // v0 = R G B A
+ __ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
+ __ASM_EMIT("rev32 v9.8h, v1.8h")
+ __ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
+ __ASM_EMIT("bit v1.16b, v9.16b, v17.16b")
+ __ASM_EMIT("stp q0, q1, [%[dst], 0x00]")
+ __ASM_EMIT("sub %[count], %[count], #8")
+ __ASM_EMIT("add %[src], %[src], 0x20")
+ __ASM_EMIT("add %[dst], %[dst], 0x20")
+
+ // 4x blocks
+ __ASM_EMIT("6:")
+ __ASM_EMIT("adds %[count], %[count], #4")
+ __ASM_EMIT("b.lt 8f")
+ __ASM_EMIT("ldr q0, [%[src], 0x00]") // v0 = R G B A
+ __ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
+ __ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
+ __ASM_EMIT("str q0, [%[dst], 0x00]")
+ __ASM_EMIT("sub %[count], %[count], #4")
+ __ASM_EMIT("add %[src], %[src], 0x10")
+ __ASM_EMIT("add %[dst], %[dst], 0x10")
+
+ // 1x blocks
+ __ASM_EMIT("8:")
+ __ASM_EMIT("adds %[count], %[count], #3")
+ __ASM_EMIT("b.lt 10f")
+ __ASM_EMIT("9:")
+ __ASM_EMIT("ld1r {v0.4s}, [%[src]]") // v0 = R G B A
+ __ASM_EMIT("rev32 v8.8h, v0.8h") // v8 = B A R G
+ __ASM_EMIT("bit v0.16b, v8.16b, v16.16b") // v0 = B G R A
+ __ASM_EMIT("st1 {v0.s}[0], [%[dst]]")
+ __ASM_EMIT("add %[src], %[src], 0x04")
+ __ASM_EMIT("add %[dst], %[dst], 0x04")
+ __ASM_EMIT("subs %[count], %[count], #1")
+ __ASM_EMIT("b.ge 9b")
+
+ // End
+ __ASM_EMIT("10:")
+ : [src] "+r" (src), [dst] "+r" (dst),
+ [count] "+r" (count)
+ : [XC] "r" (&rgba32_to_bgra32_const[0])
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3",
+ "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11",
+ "v12", "v13", "v14", "v15",
+ "v16", "v17"
+ );
+ }
+
  IF_ARCH_AARCH64(
  static const uint32_t abgr32_to_bgrff32_const[] __lsp_aligned32 =
  {

diff --git a/include/private/dsp/arch/arm/neon-d32/fastconv.h b/include/private/dsp/arch/arm/neon-d32/fastconv.h
@@ -26,6 +26,7 @@
  #error "This header should not be included directly"
 #endif /* PRIVATE_DSP_ARCH_ARM_NEON_D32_IMPL */
 
+#include <private/dsp/arch/arm/neon-d32/fft/const.h>
 #include <private/dsp/arch/arm/neon-d32/fastconv/parse.h>
 #include <private/dsp/arch/arm/neon-d32/fastconv/restore.h>
 #include <private/dsp/arch/arm/neon-d32/fastconv/apply.h>