Skip to content

Commit

Permalink
restore SkOpts::blit_row_color32
Browse files Browse the repository at this point in the history
We think there are some seemingly minor codegen improvements (pmovzxbw
instead of punpcklbw) when compiling for SSE 4.1 that might actually be
a bigger deal speed-wise than they'd seem.

Also rewrite using SkVx in a way that should scale well up to AVX2.

Change-Id: Ie7c0194dc4fe9fe81c1c932187c0bb00da69190b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/207260
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Lee Salzman <lsalzman@mozilla.com>
  • Loading branch information
Mike Klein authored and Skia Commit-Bot committed Apr 11, 2019
1 parent 25e371f commit c33e6dc
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 13 deletions.
13 changes: 1 addition & 12 deletions src/core/SkBlitRow_D32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
* found in the LICENSE file.
*/

#include "Sk4px.h"
#include "SkBlitRow.h"
#include "SkColorData.h"
#include "SkOpts.h"
Expand Down Expand Up @@ -311,15 +310,5 @@ void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[], int count, SkPMC
case 0: memmove(dst, src, count * sizeof(SkPMColor)); return;
case 255: sk_memset32(dst, color, count); return;
}

unsigned invA = 255 - SkGetPackedA32(color);
invA += invA >> 7;
SkASSERT(invA < 256); // We've should have already handled alpha == 0 externally.

Sk16h colorHighAndRound = (Sk4px::DupPMColor(color).widen() << 8) + Sk16h(128);
Sk16b invA_16x(invA);

Sk4px::MapSrc(count, dst, src, [&](const Sk4px& src4) -> Sk4px {
return (src4 * invA_16x).addNarrowHi(colorHighAndRound);
});
return SkOpts::blit_row_color32(dst, src, count, color);
}
1 change: 1 addition & 0 deletions src/core/SkOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace SkOpts {

DEFINE_DEFAULT(blit_mask_d32_a8);

DEFINE_DEFAULT(blit_row_color32);
DEFINE_DEFAULT(blit_row_s32a_opaque);

DEFINE_DEFAULT(RGBA_to_BGRA);
Expand Down
1 change: 1 addition & 0 deletions src/core/SkOpts.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace SkOpts {
extern SkXfermode* (*create_xfermode)(SkBlendMode);

extern void (*blit_mask_d32_a8)(SkPMColor*, size_t, const SkAlpha*, size_t, SkColor, int, int);
extern void (*blit_row_color32)(SkPMColor*, const SkPMColor*, int, SkPMColor);
extern void (*blit_row_s32a_opaque)(SkPMColor*, const SkPMColor*, int, U8CPU);

// Swizzle input into some sort of 8888 pixel, {premul,unpremul} x {rgba,bgra}.
Expand Down
33 changes: 33 additions & 0 deletions src/opts/SkBlitRow_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef SkBlitRow_opts_DEFINED
#define SkBlitRow_opts_DEFINED

#include "SkVx.h"
#include "SkColorData.h"
#include "SkMSAN.h"

Expand Down Expand Up @@ -40,6 +41,38 @@

namespace SK_OPTS_NS {

// Blend constant color over count src pixels, writing into dst.
inline void blit_row_color32(SkPMColor* dst, const SkPMColor* src, int count, SkPMColor color) {
constexpr int N = 8; // 4, 16 also reasonable choices
using U32 = skvx::Vec< N, uint32_t>;
using U16 = skvx::Vec<4*N, uint16_t>;
using U8 = skvx::Vec<4*N, uint8_t>;

auto kernel = [color](U32 src) {
unsigned invA = 255 - SkGetPackedA32(color);
invA += invA >> 7;
SkASSERT(0 < invA && invA < 256); // We handle alpha == 0 or alpha == 255 specially.

// (src * invA + (color << 8) + 128) >> 8
// Should all fit in 16 bits.
// TODO(mtklein): can we do src * invA with umull on ARM?
U16 s = skvx::cast<uint16_t>(skvx::bit_pun<U8>(src)),
c = skvx::cast<uint16_t>(skvx::bit_pun<U8>(U32(color))),
d = (s * invA + (c << 8) + 128)>>8;
return skvx::bit_pun<U32>(skvx::cast<uint8_t>(d));
};

while (count >= N) {
kernel(U32::Load(src)).store(dst);
src += N;
dst += N;
count -= N;
}
while (count --> 0) {
*dst++ = kernel(U32{*src++})[0];
}
}

#if defined(SK_ARM_HAS_NEON)

// Return a uint8x8_t value, r, computed as r[i] = SkMulDiv255Round(x[i], y[i]), where r[i], x[i],
Expand Down
4 changes: 4 additions & 0 deletions src/opts/SkOpts_hsw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@
#include "SkOpts.h"

#define SK_OPTS_NS hsw
#include "SkBlitRow_opts.h"
#include "SkRasterPipeline_opts.h"
#include "SkUtils_opts.h"

namespace SkOpts {
void Init_hsw() {
blit_row_color32 = hsw::blit_row_color32;
blit_row_s32a_opaque = hsw::blit_row_s32a_opaque;

#define M(st) stages_highp[SkRasterPipeline::st] = (StageFn)SK_OPTS_NS::st;
SK_RASTER_PIPELINE_STAGES(M)
just_return_highp = (StageFn)SK_OPTS_NS::just_return;
Expand Down
3 changes: 2 additions & 1 deletion src/opts/SkOpts_sse41.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
#include "SkOpts.h"

#define SK_OPTS_NS sse41
#include "SkRasterPipeline_opts.h"
#include "SkBlitRow_opts.h"
#include "SkRasterPipeline_opts.h"

namespace SkOpts {
void Init_sse41() {
blit_row_color32 = sse41::blit_row_color32;
blit_row_s32a_opaque = sse41::blit_row_s32a_opaque;

#define M(st) stages_highp[SkRasterPipeline::st] = (StageFn)SK_OPTS_NS::st;
Expand Down

0 comments on commit c33e6dc

Please sign in to comment.