Skip to content

Commit ba64cd5

Browse files
[33.0.0] Backport some fixes from main (#10743)
* Skip a test with threads on ASAN (#10728) It's expected that this has memory leaks, there's no clean shutdown in the CLI right now. * Fix missing libcalls with simd float rounding (#10699) This commit fixes some more fallout found on oss-fuzz about the x64 generating rounding builtins when it shouldn't be. This situation is caused by simd float rounding instructions which the x64 backend lowers to libcall-per-element and now needs to move that logic to the frontend instead. * Fix another libcall popping up with simd (#10735) This commit is similar to #10699, another instance of a libcall popping up late in the x64 backend. Fuzzing found this issue and to help verify this is the last one I've run the whole `*.wast` test suite with the x86_64 baseline (no target features) and saw the panic before this PR and no more panics after. * Inline assembler-x64 `generated_files` in `main.rs` (#10739) The public function `generated_files` in `cranelift-assembler-x64` makes the generated `rlib` non-deterministic because it contains the full paths of generated files. But this function is only used in `main.rs` of the same crate, so this change inlines it there to keep the library artifact deterministic while maintaining the same behavior. --------- Co-authored-by: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
1 parent fd788dc commit ba64cd5

File tree

6 files changed

+488
-103
lines changed

6 files changed

+488
-103
lines changed

cranelift/assembler-x64/src/lib.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,3 @@ pub use mem::{
8686
};
8787
pub use rex::RexFlags;
8888
pub use xmm::Xmm;
89-
90-
/// List the files generated to create this assembler.
91-
pub fn generated_files() -> Vec<std::path::PathBuf> {
92-
include!(concat!(env!("OUT_DIR"), "/generated-files.rs"))
93-
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
//! Print the path to the generated code.
22
33
fn main() {
4-
for path in cranelift_assembler_x64::generated_files() {
4+
let paths: Vec<std::path::PathBuf> = include!(concat!(env!("OUT_DIR"), "/generated-files.rs"));
5+
for path in paths {
56
println!("{}", path.display());
67
}
78
}

crates/cranelift/src/func_environ.rs

Lines changed: 165 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ use crate::translate::{
88
use crate::{BuiltinFunctionSignatures, TRAP_INTERNAL_ASSERT};
99
use cranelift_codegen::cursor::FuncCursor;
1010
use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
11-
use cranelift_codegen::ir::immediates::{Imm64, Offset32};
11+
use cranelift_codegen::ir::immediates::{Imm64, Offset32, V128Imm};
1212
use cranelift_codegen::ir::pcc::Fact;
1313
use cranelift_codegen::ir::types::*;
1414
use cranelift_codegen::ir::{self, types};
1515
use cranelift_codegen::ir::{ArgumentPurpose, ConstantData, Function, InstBuilder, MemFlags};
1616
use cranelift_codegen::isa::{TargetFrontendConfig, TargetIsa};
1717
use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap};
18-
use cranelift_frontend::FunctionBuilder;
1918
use cranelift_frontend::Variable;
19+
use cranelift_frontend::{FuncInstBuilder, FunctionBuilder};
2020
use smallvec::SmallVec;
2121
use std::mem;
2222
use wasmparser::{Operator, WasmFeatures};
@@ -3319,103 +3319,193 @@ impl FuncEnvironment<'_> {
33193319
let _ = (builder, num_pages, mem_index);
33203320
}
33213321

3322-
pub fn ceil_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3323-
// If the ISA has rounding instructions, let Cranelift use them. But if
3324-
// not, lower to a libcall here, rather than having Cranelift do it. We
3325-
// can pass our libcall the vmctx pointer, which we use for stack
3326-
// overflow checking.
3322+
/// If the ISA has rounding instructions, let Cranelift use them. But if
3323+
/// not, lower to a libcall here, rather than having Cranelift do it. We
3324+
/// can pass our libcall the vmctx pointer, which we use for stack
3325+
/// overflow checking.
3326+
///
3327+
/// This helper is generic for all rounding instructions below, both for
3328+
/// scalar and simd types. The `clif_round` argument is the CLIF-level
3329+
/// rounding instruction to use if the ISA has the instruction, and the
3330+
/// `round_builtin` helper is used to determine which element-level
3331+
/// rounding operation builtin is used. Note that this handles the case
3332+
/// when `value` is a vector by doing an element-wise libcall invocation.
3333+
fn isa_round(
3334+
&mut self,
3335+
builder: &mut FunctionBuilder,
3336+
value: ir::Value,
3337+
clif_round: fn(FuncInstBuilder<'_, '_>, ir::Value) -> ir::Value,
3338+
round_builtin: fn(&mut BuiltinFunctions, &mut Function) -> ir::FuncRef,
3339+
) -> ir::Value {
33273340
if self.isa.has_round() {
3328-
builder.ins().ceil(value)
3329-
} else {
3330-
let ceil = self.builtin_functions.ceil_f32(builder.func);
3331-
let vmctx = self.vmctx_val(&mut builder.cursor());
3332-
let call = builder.ins().call(ceil, &[vmctx, value]);
3341+
return clif_round(builder.ins(), value);
3342+
}
3343+
3344+
let vmctx = self.vmctx_val(&mut builder.cursor());
3345+
let round = round_builtin(&mut self.builtin_functions, builder.func);
3346+
let round_one = |builder: &mut FunctionBuilder, value: ir::Value| {
3347+
let call = builder.ins().call(round, &[vmctx, value]);
33333348
*builder.func.dfg.inst_results(call).first().unwrap()
3349+
};
3350+
3351+
let ty = builder.func.dfg.value_type(value);
3352+
if !ty.is_vector() {
3353+
return round_one(builder, value);
3354+
}
3355+
3356+
assert_eq!(ty.bits(), 128);
3357+
let zero = builder.func.dfg.constants.insert(V128Imm([0; 16]).into());
3358+
let mut result = builder.ins().vconst(ty, zero);
3359+
for i in 0..u8::try_from(ty.lane_count()).unwrap() {
3360+
let element = builder.ins().extractlane(value, i);
3361+
let element_rounded = round_one(builder, element);
3362+
result = builder.ins().insertlane(result, element_rounded, i);
33343363
}
3364+
result
3365+
}
3366+
3367+
pub fn ceil_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3368+
self.isa_round(
3369+
builder,
3370+
value,
3371+
|ins, val| ins.ceil(val),
3372+
BuiltinFunctions::ceil_f32,
3373+
)
33353374
}
33363375

33373376
pub fn ceil_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3338-
// See the comments in `ceil_f32` about libcalls.
3339-
if self.isa.has_round() {
3340-
builder.ins().ceil(value)
3341-
} else {
3342-
let ceil = self.builtin_functions.ceil_f64(builder.func);
3343-
let vmctx = self.vmctx_val(&mut builder.cursor());
3344-
let call = builder.ins().call(ceil, &[vmctx, value]);
3345-
*builder.func.dfg.inst_results(call).first().unwrap()
3346-
}
3377+
self.isa_round(
3378+
builder,
3379+
value,
3380+
|ins, val| ins.ceil(val),
3381+
BuiltinFunctions::ceil_f64,
3382+
)
3383+
}
3384+
3385+
pub fn ceil_f32x4(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3386+
self.isa_round(
3387+
builder,
3388+
value,
3389+
|ins, val| ins.ceil(val),
3390+
BuiltinFunctions::ceil_f32,
3391+
)
3392+
}
3393+
3394+
pub fn ceil_f64x2(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3395+
self.isa_round(
3396+
builder,
3397+
value,
3398+
|ins, val| ins.ceil(val),
3399+
BuiltinFunctions::ceil_f64,
3400+
)
33473401
}
33483402

33493403
pub fn floor_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3350-
// See the comments in `ceil_f32` about libcalls.
3351-
if self.isa.has_round() {
3352-
builder.ins().floor(value)
3353-
} else {
3354-
let floor = self.builtin_functions.floor_f32(builder.func);
3355-
let vmctx = self.vmctx_val(&mut builder.cursor());
3356-
let call = builder.ins().call(floor, &[vmctx, value]);
3357-
*builder.func.dfg.inst_results(call).first().unwrap()
3358-
}
3404+
self.isa_round(
3405+
builder,
3406+
value,
3407+
|ins, val| ins.floor(val),
3408+
BuiltinFunctions::floor_f32,
3409+
)
33593410
}
33603411

33613412
pub fn floor_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3362-
// See the comments in `ceil_f32` about libcalls.
3363-
if self.isa.has_round() {
3364-
builder.ins().floor(value)
3365-
} else {
3366-
let floor = self.builtin_functions.floor_f64(builder.func);
3367-
let vmctx = self.vmctx_val(&mut builder.cursor());
3368-
let call = builder.ins().call(floor, &[vmctx, value]);
3369-
*builder.func.dfg.inst_results(call).first().unwrap()
3370-
}
3413+
self.isa_round(
3414+
builder,
3415+
value,
3416+
|ins, val| ins.floor(val),
3417+
BuiltinFunctions::floor_f64,
3418+
)
3419+
}
3420+
3421+
pub fn floor_f32x4(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3422+
self.isa_round(
3423+
builder,
3424+
value,
3425+
|ins, val| ins.floor(val),
3426+
BuiltinFunctions::floor_f32,
3427+
)
3428+
}
3429+
3430+
pub fn floor_f64x2(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3431+
self.isa_round(
3432+
builder,
3433+
value,
3434+
|ins, val| ins.floor(val),
3435+
BuiltinFunctions::floor_f64,
3436+
)
33713437
}
33723438

33733439
pub fn trunc_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3374-
// See the comments in `ceil_f32` about libcalls.
3375-
if self.isa.has_round() {
3376-
builder.ins().trunc(value)
3377-
} else {
3378-
let trunc = self.builtin_functions.trunc_f32(builder.func);
3379-
let vmctx = self.vmctx_val(&mut builder.cursor());
3380-
let call = builder.ins().call(trunc, &[vmctx, value]);
3381-
*builder.func.dfg.inst_results(call).first().unwrap()
3382-
}
3440+
self.isa_round(
3441+
builder,
3442+
value,
3443+
|ins, val| ins.trunc(val),
3444+
BuiltinFunctions::trunc_f32,
3445+
)
33833446
}
33843447

33853448
pub fn trunc_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3386-
// See the comments in `ceil_f32` about libcalls.
3387-
if self.isa.has_round() {
3388-
builder.ins().trunc(value)
3389-
} else {
3390-
let trunc = self.builtin_functions.trunc_f64(builder.func);
3391-
let vmctx = self.vmctx_val(&mut builder.cursor());
3392-
let call = builder.ins().call(trunc, &[vmctx, value]);
3393-
*builder.func.dfg.inst_results(call).first().unwrap()
3394-
}
3449+
self.isa_round(
3450+
builder,
3451+
value,
3452+
|ins, val| ins.trunc(val),
3453+
BuiltinFunctions::trunc_f64,
3454+
)
3455+
}
3456+
3457+
pub fn trunc_f32x4(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3458+
self.isa_round(
3459+
builder,
3460+
value,
3461+
|ins, val| ins.trunc(val),
3462+
BuiltinFunctions::trunc_f32,
3463+
)
3464+
}
3465+
3466+
pub fn trunc_f64x2(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3467+
self.isa_round(
3468+
builder,
3469+
value,
3470+
|ins, val| ins.trunc(val),
3471+
BuiltinFunctions::trunc_f64,
3472+
)
33953473
}
33963474

33973475
pub fn nearest_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3398-
// See the comments in `ceil_f32` about libcalls.
3399-
if self.isa.has_round() {
3400-
builder.ins().nearest(value)
3401-
} else {
3402-
let nearest = self.builtin_functions.nearest_f32(builder.func);
3403-
let vmctx = self.vmctx_val(&mut builder.cursor());
3404-
let call = builder.ins().call(nearest, &[vmctx, value]);
3405-
*builder.func.dfg.inst_results(call).first().unwrap()
3406-
}
3476+
self.isa_round(
3477+
builder,
3478+
value,
3479+
|ins, val| ins.nearest(val),
3480+
BuiltinFunctions::nearest_f32,
3481+
)
34073482
}
34083483

34093484
pub fn nearest_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3410-
// See the comments in `ceil_f32` about libcalls.
3411-
if self.isa.has_round() {
3412-
builder.ins().nearest(value)
3413-
} else {
3414-
let nearest = self.builtin_functions.nearest_f64(builder.func);
3415-
let vmctx = self.vmctx_val(&mut builder.cursor());
3416-
let call = builder.ins().call(nearest, &[vmctx, value]);
3417-
*builder.func.dfg.inst_results(call).first().unwrap()
3418-
}
3485+
self.isa_round(
3486+
builder,
3487+
value,
3488+
|ins, val| ins.nearest(val),
3489+
BuiltinFunctions::nearest_f64,
3490+
)
3491+
}
3492+
3493+
pub fn nearest_f32x4(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3494+
self.isa_round(
3495+
builder,
3496+
value,
3497+
|ins, val| ins.nearest(val),
3498+
BuiltinFunctions::nearest_f32,
3499+
)
3500+
}
3501+
3502+
pub fn nearest_f64x2(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
3503+
self.isa_round(
3504+
builder,
3505+
value,
3506+
|ins, val| ins.nearest(val),
3507+
BuiltinFunctions::nearest_f64,
3508+
)
34193509
}
34203510

34213511
pub fn swizzle(

crates/cranelift/src/translate/code_translator.rs

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2041,11 +2041,26 @@ pub fn translate_operator(
20412041
}
20422042
Operator::I32x4RelaxedTruncF64x2UZero | Operator::I32x4TruncSatF64x2UZero => {
20432043
let a = pop1_with_bitcast(state, F64X2, builder);
2044-
let converted_a = builder.ins().fcvt_to_uint_sat(I64X2, a);
2045-
let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into());
2046-
let zero = builder.ins().vconst(I64X2, handle);
2047-
2048-
state.push1(builder.ins().uunarrow(converted_a, zero));
2044+
let zero_constant = builder.func.dfg.constants.insert(vec![0u8; 16].into());
2045+
let result = if environ.is_x86() && !environ.isa().has_round() {
2046+
// On x86 the vector lowering for `fcvt_to_uint_sat` requires
2047+
// SSE4.1 `round` instructions. If SSE4.1 isn't available it
2048+
// falls back to a libcall which we don't want in Wasmtime.
2049+
// Handle this by falling back to the scalar implementation
2050+
// which does not require SSE4.1 instructions.
2051+
let lane0 = builder.ins().extractlane(a, 0);
2052+
let lane1 = builder.ins().extractlane(a, 1);
2053+
let lane0_rounded = builder.ins().fcvt_to_uint_sat(I32, lane0);
2054+
let lane1_rounded = builder.ins().fcvt_to_uint_sat(I32, lane1);
2055+
let result = builder.ins().vconst(I32X4, zero_constant);
2056+
let result = builder.ins().insertlane(result, lane0_rounded, 0);
2057+
builder.ins().insertlane(result, lane1_rounded, 1)
2058+
} else {
2059+
let converted_a = builder.ins().fcvt_to_uint_sat(I64X2, a);
2060+
let zero = builder.ins().vconst(I64X2, zero_constant);
2061+
builder.ins().uunarrow(converted_a, zero)
2062+
};
2063+
state.push1(result);
20492064
}
20502065

20512066
Operator::I8x16NarrowI16x8S => {
@@ -2136,24 +2151,37 @@ pub fn translate_operator(
21362151
let widen_high = builder.ins().uwiden_high(a);
21372152
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
21382153
}
2139-
Operator::F32x4Ceil | Operator::F64x2Ceil => {
2140-
// This is something of a misuse of `type_of`, because that produces the return type
2141-
// of `op`. In this case we want the arg type, but we know it's the same as the
2142-
// return type. Same for the 3 cases below.
2143-
let arg = pop1_with_bitcast(state, type_of(op), builder);
2144-
state.push1(builder.ins().ceil(arg));
2154+
Operator::F32x4Ceil => {
2155+
let arg = pop1_with_bitcast(state, F32X4, builder);
2156+
state.push1(environ.ceil_f32x4(builder, arg));
21452157
}
2146-
Operator::F32x4Floor | Operator::F64x2Floor => {
2147-
let arg = pop1_with_bitcast(state, type_of(op), builder);
2148-
state.push1(builder.ins().floor(arg));
2158+
Operator::F64x2Ceil => {
2159+
let arg = pop1_with_bitcast(state, F64X2, builder);
2160+
state.push1(environ.ceil_f64x2(builder, arg));
21492161
}
2150-
Operator::F32x4Trunc | Operator::F64x2Trunc => {
2151-
let arg = pop1_with_bitcast(state, type_of(op), builder);
2152-
state.push1(builder.ins().trunc(arg));
2162+
Operator::F32x4Floor => {
2163+
let arg = pop1_with_bitcast(state, F32X4, builder);
2164+
state.push1(environ.floor_f32x4(builder, arg));
21532165
}
2154-
Operator::F32x4Nearest | Operator::F64x2Nearest => {
2155-
let arg = pop1_with_bitcast(state, type_of(op), builder);
2156-
state.push1(builder.ins().nearest(arg));
2166+
Operator::F64x2Floor => {
2167+
let arg = pop1_with_bitcast(state, F64X2, builder);
2168+
state.push1(environ.floor_f64x2(builder, arg));
2169+
}
2170+
Operator::F32x4Trunc => {
2171+
let arg = pop1_with_bitcast(state, F32X4, builder);
2172+
state.push1(environ.trunc_f32x4(builder, arg));
2173+
}
2174+
Operator::F64x2Trunc => {
2175+
let arg = pop1_with_bitcast(state, F64X2, builder);
2176+
state.push1(environ.trunc_f64x2(builder, arg));
2177+
}
2178+
Operator::F32x4Nearest => {
2179+
let arg = pop1_with_bitcast(state, F32X4, builder);
2180+
state.push1(environ.nearest_f32x4(builder, arg));
2181+
}
2182+
Operator::F64x2Nearest => {
2183+
let arg = pop1_with_bitcast(state, F64X2, builder);
2184+
state.push1(environ.nearest_f64x2(builder, arg));
21572185
}
21582186
Operator::I32x4DotI16x8S => {
21592187
let (a, b) = pop2_with_bitcast(state, I16X8, builder);

0 commit comments

Comments
 (0)