Skip to content

Commit

Permalink
[x86 & wasm] Split up double saturating-narrows from i32 (halide#7280)
Browse files Browse the repository at this point in the history
* better x86 double sat-cast + add test

* fix wasm too + test

Co-authored-by: Steven Johnson <srj@google.com>
  • Loading branch information
2 people authored and ardier committed Mar 3, 2024
1 parent 9009548 commit 3ed06f7
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/CodeGen_WebAssembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
#include "IRMatch.h"
#include "IROperator.h"
#include "LLVM_Headers.h"
#include "Substitute.h"

namespace Halide {
namespace Internal {

using std::pair;
using std::string;
using std::vector;

Expand Down Expand Up @@ -193,6 +195,12 @@ void CodeGen_WebAssembly::visit(const Call *op) {
{"saturating_narrow", i16_sat(wild_i32x_), Target::WasmSimd128},
{"saturating_narrow", u16_sat(wild_i32x_), Target::WasmSimd128},
};
static const vector<pair<Expr, Expr>> cast_rewrites = {
// Some double-narrowing saturating casts can be better expressed as
// combinations of single-narrowing saturating casts.
{u8_sat(wild_i32x_), u8_sat(i16_sat(wild_i32x_))},
{i8_sat(wild_i32x_), i8_sat(i16_sat(wild_i32x_))},
};
// clang-format on

if (op->type.is_vector()) {
Expand All @@ -208,6 +216,14 @@ void CodeGen_WebAssembly::visit(const Call *op) {
}
}
}

for (const auto &i : cast_rewrites) {
if (expr_match(i.first, op, matches)) {
Expr replacement = substitute("*", matches[0], with_lanes(i.second, op->type.lanes()));
value = codegen(replacement);
return;
}
}
}

if (op->is_intrinsic(Call::round)) {
Expand Down
16 changes: 16 additions & 0 deletions src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
#include "IROperator.h"
#include "LLVM_Headers.h"
#include "Simplify.h"
#include "Substitute.h"
#include "Util.h"

namespace Halide {
namespace Internal {

using std::pair;
using std::string;
using std::vector;

Expand Down Expand Up @@ -617,6 +619,20 @@ void CodeGen_X86::visit(const Call *op) {
}
}

static const vector<pair<Expr, Expr>> cast_rewrites = {
// Some double-narrowing saturating casts can be better expressed as
// combinations of single-narrowing saturating casts.
{u8_sat(wild_i32x_), u8_sat(i16_sat(wild_i32x_))},
{i8_sat(wild_i32x_), i8_sat(i16_sat(wild_i32x_))},
};
for (const auto &i : cast_rewrites) {
if (expr_match(i.first, op, matches)) {
Expr replacement = substitute("*", matches[0], with_lanes(i.second, op->type.lanes()));
value = codegen(replacement);
return;
}
}

// Check for saturating_pmulhrs. On x86, pmulhrs is truncating, but it's still faster
// to use pmulhrs than to lower (producing widening multiplication), and have a check
// for the singular overflow case.
Expand Down
2 changes: 2 additions & 0 deletions test/correctness/simd_op_check_wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,8 @@ class SimdOpCheckWASM : public SimdOpCheckTest {
check("i8x16.narrow_i16x8_u", 16 * w, u8_sat(i16_1));
check("i16x8.narrow_i32x4_s", 8 * w, i16_sat(i32_1));
check("i16x8.narrow_i32x4_u", 8 * w, u16_sat(i32_1));
check("i16x8.narrow_i32x4_s", 8 * w, i8_sat(i32_1));
check("i16x8.narrow_i32x4_s", 8 * w, u8_sat(i32_1));

// Integer to integer widening
check("i16x8.extend_low_i8x16_s", 16 * w, i16(i8_1));
Expand Down
2 changes: 2 additions & 0 deletions test/correctness/simd_op_check_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ class SimdOpCheckX86 : public SimdOpCheckTest {
check(std::string("packssdw") + check_suffix, 4 * w, i16_sat(i32_1));
check(std::string("packsswb") + check_suffix, 8 * w, i8_sat(i16_1));
check(std::string("packuswb") + check_suffix, 8 * w, u8_sat(i16_1));
check(std::string("packssdw") + check_suffix, 8 * w, u8_sat(i32_1));
check(std::string("packssdw") + check_suffix, 8 * w, i8_sat(i32_1));

// Sum-of-absolute-difference ops
{
Expand Down

0 comments on commit 3ed06f7

Please sign in to comment.