Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x64: Fix ISA requirement for pmaddubsw #6629

Merged
merged 3 commits into from
Jun 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/aarch64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ impl TargetIsa for AArch64Backend {
fn has_x86_pmulhrsw_lowering(&self) -> bool {
false
}

fn has_x86_pmaddubsw_lowering(&self) -> bool {
false
}
}

impl fmt::Display for AArch64Backend {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,10 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
/// Returns whether the CLIF `x86_pmulhrsw` instruction is implemented for
/// this ISA.
fn has_x86_pmulhrsw_lowering(&self) -> bool;

/// Returns whether the CLIF `x86_pmaddubsw` instruction is implemented for
/// this ISA.
fn has_x86_pmaddubsw_lowering(&self) -> bool;
}

/// Function alignment specifications as required by an ISA, returned by
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ impl TargetIsa for Riscv64Backend {
fn has_x86_pmulhrsw_lowering(&self) -> bool {
false
}

fn has_x86_pmaddubsw_lowering(&self) -> bool {
false
}
}

impl fmt::Display for Riscv64Backend {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/s390x/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ impl TargetIsa for S390xBackend {
fn has_x86_pmulhrsw_lowering(&self) -> bool {
false
}

fn has_x86_pmaddubsw_lowering(&self) -> bool {
false
}
}

impl fmt::Display for S390xBackend {
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,6 @@ impl SseOpcode {
| SseOpcode::Pcmpgtd
| SseOpcode::Pextrw
| SseOpcode::Pinsrw
| SseOpcode::Pmaddubsw
| SseOpcode::Pmaddwd
| SseOpcode::Pmaxsw
| SseOpcode::Pmaxub
Expand Down Expand Up @@ -1303,6 +1302,7 @@ impl SseOpcode {
| SseOpcode::Pshufb
| SseOpcode::Phaddw
| SseOpcode::Phaddd
| SseOpcode::Pmaddubsw
| SseOpcode::Movddup => SSSE3,

SseOpcode::Blendvpd
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2413,6 +2413,7 @@
;; Rules for `x86_pmaddubsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (x86_pmaddubsw x y)))
(if-let $true (use_ssse3))
(x64_pmaddubsw y x))

;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down Expand Up @@ -3575,6 +3576,7 @@
(has_type $I16X8 (iadd_pairwise
(swiden_low val @ (value_type $I8X16))
(swiden_high val))))
(if-let $true (use_ssse3))
(let ((mul_const Xmm (x64_xmm_load_const $I8X16
(emit_u128_le_const 0x01010101010101010101010101010101))))
(x64_pmaddubsw mul_const val)))
Expand All @@ -3592,6 +3594,7 @@
(has_type $I16X8 (iadd_pairwise
(uwiden_low val @ (value_type $I8X16))
(uwiden_high val))))
(if-let $true (use_ssse3))
(let ((mul_const XmmMem (emit_u128_le_const 0x01010101010101010101010101010101)))
(x64_pmaddubsw val mul_const)))

Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ impl TargetIsa for X64Backend {
fn has_x86_pmulhrsw_lowering(&self) -> bool {
self.x64_flags.use_ssse3()
}

fn has_x86_pmaddubsw_lowering(&self) -> bool {
self.x64_flags.use_ssse3()
}
}

impl fmt::Display for X64Backend {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
test compile precise-output
target x86_64
target x86_64 ssse3

function %fn1(i8x16) -> i16x8 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/src/test_wasm/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,10 @@ impl<'a> FuncEnvironment for FuncEnv<'a> {
self.config.target.contains("x86_64")
}

fn use_x86_pmaddubsw_for_dot(&self) -> bool {
self.config.target.contains("x86_64")
}

fn translate_call_ref(
&mut self,
_builder: &mut cranelift_frontend::FunctionBuilder<'_>,
Expand Down
29 changes: 15 additions & 14 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2302,7 +2302,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I16x8RelaxedDotI8x16I7x16S => {
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
state.push1(
if environ.relaxed_simd_deterministic() || !environ.is_x86() {
if environ.relaxed_simd_deterministic() || !environ.use_x86_pmaddubsw_for_dot() {
// Deterministic semantics are to treat both operands as
// signed integers and perform the dot product.
let alo = builder.ins().swiden_low(a);
Expand All @@ -2321,19 +2321,20 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I32x4RelaxedDotI8x16I7x16AddS => {
let c = pop1_with_bitcast(state, I32X4, builder);
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
let dot = if environ.relaxed_simd_deterministic() || !environ.is_x86() {
// Deterministic semantics are to treat both operands as
// signed integers and perform the dot product.
let alo = builder.ins().swiden_low(a);
let blo = builder.ins().swiden_low(b);
let lo = builder.ins().imul(alo, blo);
let ahi = builder.ins().swiden_high(a);
let bhi = builder.ins().swiden_high(b);
let hi = builder.ins().imul(ahi, bhi);
builder.ins().iadd_pairwise(lo, hi)
} else {
builder.ins().x86_pmaddubsw(a, b)
};
let dot =
if environ.relaxed_simd_deterministic() || !environ.use_x86_pmaddubsw_for_dot() {
// Deterministic semantics are to treat both operands as
// signed integers and perform the dot product.
let alo = builder.ins().swiden_low(a);
let blo = builder.ins().swiden_low(b);
let lo = builder.ins().imul(alo, blo);
let ahi = builder.ins().swiden_high(a);
let bhi = builder.ins().swiden_high(b);
let hi = builder.ins().imul(ahi, bhi);
builder.ins().iadd_pairwise(lo, hi)
} else {
builder.ins().x86_pmaddubsw(a, b)
};
let dotlo = builder.ins().swiden_low(dot);
let dothi = builder.ins().swiden_high(dot);
let dot32 = builder.ins().iadd_pairwise(dotlo, dothi);
Expand Down
6 changes: 6 additions & 0 deletions cranelift/wasm/src/environ/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,12 @@ pub trait FuncEnvironment: TargetEnvironment {
fn use_x86_pmulhrsw_for_relaxed_q15mul(&self) -> bool {
false
}

/// Returns whether the CLIF `x86_pmaddubsw` instruction should be used for
/// the relaxed-simd dot-product instructions instruction.
fn use_x86_pmaddubsw_for_dot(&self) -> bool {
false
}
}

/// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the
Expand Down
4 changes: 4 additions & 0 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2211,4 +2211,8 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
fn use_x86_pmulhrsw_for_relaxed_q15mul(&self) -> bool {
self.isa.has_x86_pmulhrsw_lowering()
}

fn use_x86_pmaddubsw_for_dot(&self) -> bool {
self.isa.has_x86_pmaddubsw_lowering()
}
}