Skip to content

Commit 3089c31

Browse files
committed
Auto merge of #116609 - eduardosm:bump-stdarch, r=workingjubilee
Bump stdarch submodule and remove special handling for LLVM intrinsics that are no longer needed Bumps stdarch to pull rust-lang/stdarch#1477, which reimplemented some functions with portable SIMD intrinsics instead of arch specific LLVM intrinsics. Handling of those LLVM intrinsics is removed from cranelift codegen and miri. cc `@RalfJung` `@bjorn3`
2 parents 615d0f2 + 35e2f4e commit 3089c31

File tree

6 files changed

+8
-262
lines changed

6 files changed

+8
-262
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs

-35
Original file line numberDiff line numberDiff line change
@@ -32,41 +32,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
3232
ret.write_cvalue(fx, CValue::by_val(res, fx.layout_of(fx.tcx.types.i64)));
3333
}
3434

35-
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
36-
"llvm.x86.sse2.pmovmskb.128"
37-
| "llvm.x86.avx2.pmovmskb"
38-
| "llvm.x86.sse.movmsk.ps"
39-
| "llvm.x86.sse2.movmsk.pd" => {
40-
intrinsic_args!(fx, args => (a); intrinsic);
41-
42-
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
43-
let lane_ty = fx.clif_type(lane_ty).unwrap();
44-
assert!(lane_count <= 32);
45-
46-
let mut res = fx.bcx.ins().iconst(types::I32, 0);
47-
48-
for lane in (0..lane_count).rev() {
49-
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
50-
51-
// cast float to int
52-
let a_lane = match lane_ty {
53-
types::F32 => codegen_bitcast(fx, types::I32, a_lane),
54-
types::F64 => codegen_bitcast(fx, types::I64, a_lane),
55-
_ => a_lane,
56-
};
57-
58-
// extract sign bit of an int
59-
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1));
60-
61-
// shift sign bit into result
62-
let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false);
63-
res = fx.bcx.ins().ishl_imm(res, 1);
64-
res = fx.bcx.ins().bor(res, a_lane_sign);
65-
}
66-
67-
let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32));
68-
ret.write_cvalue(fx, res);
69-
}
7035
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
7136
let (x, y, kind) = match args {
7237
[x, y, kind] => (x, y, kind),

src/tools/miri/src/shims/x86/sse.rs

-19
Original file line numberDiff line numberDiff line change
@@ -209,25 +209,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
209209
)?;
210210
}
211211
}
212-
// Used to implement the _mm_movemask_ps function.
213-
// Returns a scalar integer where the i-th bit is the highest
214-
// bit of the i-th component of `op`.
215-
// https://www.felixcloutier.com/x86/movmskps
216-
"movmsk.ps" => {
217-
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
218-
let (op, op_len) = this.operand_to_simd(op)?;
219-
220-
let mut res = 0;
221-
for i in 0..op_len {
222-
let op = this.read_scalar(&this.project_index(&op, i)?)?;
223-
let op = op.to_u32()?;
224-
225-
// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
226-
res |= (op >> 31) << i;
227-
}
228-
229-
this.write_scalar(Scalar::from_u32(res), dest)?;
230-
}
231212
_ => return Ok(EmulateForeignItemResult::NotSupported),
232213
}
233214
Ok(EmulateForeignItemResult::NeedsJumping)

src/tools/miri/src/shims/x86/sse2.rs

+1-175
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
use rustc_apfloat::{
2-
ieee::{Double, Single},
3-
Float as _,
4-
};
5-
use rustc_middle::mir;
1+
use rustc_apfloat::ieee::Double;
62
use rustc_middle::ty::layout::LayoutOf as _;
73
use rustc_middle::ty::Ty;
84
use rustc_span::Symbol;
@@ -39,49 +35,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
3935
// Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
4036
// vectors.
4137
match unprefixed_name {
42-
// Used to implement the _mm_avg_epu8 and _mm_avg_epu16 functions.
43-
// Averages packed unsigned 8/16-bit integers in `left` and `right`.
44-
"pavg.b" | "pavg.w" => {
45-
let [left, right] =
46-
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
47-
48-
let (left, left_len) = this.operand_to_simd(left)?;
49-
let (right, right_len) = this.operand_to_simd(right)?;
50-
let (dest, dest_len) = this.place_to_simd(dest)?;
51-
52-
assert_eq!(dest_len, left_len);
53-
assert_eq!(dest_len, right_len);
54-
55-
for i in 0..dest_len {
56-
let left = this.read_immediate(&this.project_index(&left, i)?)?;
57-
let right = this.read_immediate(&this.project_index(&right, i)?)?;
58-
let dest = this.project_index(&dest, i)?;
59-
60-
// Widen the operands to avoid overflow
61-
let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
62-
let left = this.int_to_int_or_float(&left, twice_wide)?;
63-
let right = this.int_to_int_or_float(&right, twice_wide)?;
64-
65-
// Calculate left + right + 1
66-
let added = this.wrapping_binary_op(mir::BinOp::Add, &left, &right)?;
67-
let added = this.wrapping_binary_op(
68-
mir::BinOp::Add,
69-
&added,
70-
&ImmTy::from_uint(1u32, twice_wide),
71-
)?;
72-
73-
// Calculate (left + right + 1) / 2
74-
let divided = this.wrapping_binary_op(
75-
mir::BinOp::Div,
76-
&added,
77-
&ImmTy::from_uint(2u32, twice_wide),
78-
)?;
79-
80-
// Narrow back to the original type
81-
let res = this.int_to_int_or_float(&divided, dest.layout)?;
82-
this.write_immediate(*res, &dest)?;
83-
}
84-
}
8538
// Used to implement the _mm_madd_epi16 function.
8639
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
8740
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
@@ -118,70 +71,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
11871
this.write_scalar(Scalar::from_i32(res), &dest)?;
11972
}
12073
}
121-
// Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
122-
"pmulh.w" | "pmulhu.w" => {
123-
let [left, right] =
124-
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
125-
126-
let (left, left_len) = this.operand_to_simd(left)?;
127-
let (right, right_len) = this.operand_to_simd(right)?;
128-
let (dest, dest_len) = this.place_to_simd(dest)?;
129-
130-
assert_eq!(dest_len, left_len);
131-
assert_eq!(dest_len, right_len);
132-
133-
for i in 0..dest_len {
134-
let left = this.read_immediate(&this.project_index(&left, i)?)?;
135-
let right = this.read_immediate(&this.project_index(&right, i)?)?;
136-
let dest = this.project_index(&dest, i)?;
137-
138-
// Widen the operands to avoid overflow
139-
let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
140-
let left = this.int_to_int_or_float(&left, twice_wide)?;
141-
let right = this.int_to_int_or_float(&right, twice_wide)?;
142-
143-
// Multiply
144-
let multiplied = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
145-
// Keep the high half
146-
let high = this.wrapping_binary_op(
147-
mir::BinOp::Shr,
148-
&multiplied,
149-
&ImmTy::from_uint(dest.layout.size.bits(), twice_wide),
150-
)?;
151-
152-
// Narrow back to the original type
153-
let res = this.int_to_int_or_float(&high, dest.layout)?;
154-
this.write_immediate(*res, &dest)?;
155-
}
156-
}
157-
// Used to implement the _mm_mul_epu32 function.
158-
// Multiplies the the low unsigned 32-bit integers from each packed
159-
// 64-bit element and stores the result as 64-bit unsigned integers.
160-
"pmulu.dq" => {
161-
let [left, right] =
162-
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
163-
164-
let (left, left_len) = this.operand_to_simd(left)?;
165-
let (right, right_len) = this.operand_to_simd(right)?;
166-
let (dest, dest_len) = this.place_to_simd(dest)?;
167-
168-
// left and right are u32x4, dest is u64x2
169-
assert_eq!(left_len, 4);
170-
assert_eq!(right_len, 4);
171-
assert_eq!(dest_len, 2);
172-
173-
for i in 0..dest_len {
174-
let op_i = i.checked_mul(2).unwrap();
175-
let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u32()?;
176-
let right = this.read_scalar(&this.project_index(&right, op_i)?)?.to_u32()?;
177-
let dest = this.project_index(&dest, i)?;
178-
179-
// The multiplication will not overflow because stripping the
180-
// operands are expanded from 32-bit to 64-bit.
181-
let res = u64::from(left).checked_mul(u64::from(right)).unwrap();
182-
this.write_scalar(Scalar::from_u64(res), &dest)?;
183-
}
184-
}
18574
// Used to implement the _mm_sad_epu8 function.
18675
// Computes the absolute differences of packed unsigned 8-bit integers in `a`
18776
// and `b`, then horizontally sum each consecutive 8 differences to produce
@@ -370,25 +259,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
370259
this.write_scalar(Scalar::from_u64(res), &dest)?;
371260
}
372261
}
373-
// Used to implement the _mm_cvtepi32_ps function.
374-
// Converts packed i32 to packed f32.
375-
// FIXME: Can we get rid of this intrinsic and just use simd_as?
376-
"cvtdq2ps" => {
377-
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
378-
379-
let (op, op_len) = this.operand_to_simd(op)?;
380-
let (dest, dest_len) = this.place_to_simd(dest)?;
381-
382-
assert_eq!(dest_len, op_len);
383-
384-
for i in 0..dest_len {
385-
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_i32()?;
386-
let dest = this.project_index(&dest, i)?;
387-
388-
let res = Scalar::from_f32(Single::from_i128(op.into()).value);
389-
this.write_scalar(res, &dest)?;
390-
}
391-
}
392262
// Used to implement the _mm_cvtps_epi32 and _mm_cvttps_epi32 functions.
393263
// Converts packed f32 to packed i32.
394264
"cvtps2dq" | "cvttps2dq" => {
@@ -652,31 +522,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
652522
};
653523
this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
654524
}
655-
// Used to implement the _mm_cvtpd_ps and _mm_cvtps_pd functions.
656-
// Converts packed f32/f64 to packed f64/f32.
657-
"cvtpd2ps" | "cvtps2pd" => {
658-
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
659-
660-
let (op, op_len) = this.operand_to_simd(op)?;
661-
let (dest, dest_len) = this.place_to_simd(dest)?;
662-
663-
// For cvtpd2ps: op is f64x2, dest is f32x4
664-
// For cvtps2pd: op is f32x4, dest is f64x2
665-
// In either case, the two first values are converted
666-
for i in 0..op_len.min(dest_len) {
667-
let op = this.read_immediate(&this.project_index(&op, i)?)?;
668-
let dest = this.project_index(&dest, i)?;
669-
670-
let res = this.float_to_float_or_int(&op, dest.layout)?;
671-
this.write_immediate(*res, &dest)?;
672-
}
673-
// For f32 -> f64, ignore the remaining
674-
// For f64 -> f32, fill the remaining with zeros
675-
for i in op_len..dest_len {
676-
let dest = this.project_index(&dest, i)?;
677-
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
678-
}
679-
}
680525
// Used to implement the _mm_cvtpd_epi32 and _mm_cvttpd_epi32 functions.
681526
// Converts packed f64 to packed i32.
682527
"cvtpd2dq" | "cvttpd2dq" => {
@@ -772,25 +617,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
772617
)?;
773618
}
774619
}
775-
// Used to implement the _mm_movemask_pd function.
776-
// Returns a scalar integer where the i-th bit is the highest
777-
// bit of the i-th component of `op`.
778-
// https://www.felixcloutier.com/x86/movmskpd
779-
"movmsk.pd" => {
780-
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
781-
let (op, op_len) = this.operand_to_simd(op)?;
782-
783-
let mut res = 0;
784-
for i in 0..op_len {
785-
let op = this.read_scalar(&this.project_index(&op, i)?)?;
786-
let op = op.to_u64()?;
787-
788-
// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
789-
res |= (op >> 63) << i;
790-
}
791-
792-
this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
793-
}
794620
// Used to implement the `_mm_pause` function.
795621
// The intrinsic is used to hint the processor that the code is in a spin-loop.
796622
"pause" => {

src/tools/miri/src/shims/x86/sse3.rs

-26
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
2222
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();
2323

2424
match unprefixed_name {
25-
// Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
26-
// Alternatingly add and subtract floating point (f32 or f64) from
27-
// `left` and `right`
28-
"addsub.ps" | "addsub.pd" => {
29-
let [left, right] =
30-
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
31-
32-
let (left, left_len) = this.operand_to_simd(left)?;
33-
let (right, right_len) = this.operand_to_simd(right)?;
34-
let (dest, dest_len) = this.place_to_simd(dest)?;
35-
36-
assert_eq!(dest_len, left_len);
37-
assert_eq!(dest_len, right_len);
38-
39-
for i in 0..dest_len {
40-
let left = this.read_immediate(&this.project_index(&left, i)?)?;
41-
let right = this.read_immediate(&this.project_index(&right, i)?)?;
42-
let dest = this.project_index(&dest, i)?;
43-
44-
// Even elements are subtracted and odd elements are added.
45-
let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
46-
let res = this.wrapping_binary_op(op, &left, &right)?;
47-
48-
this.write_immediate(*res, &dest)?;
49-
}
50-
}
5125
// Used to implement the _mm_h{add,sub}_p{s,d} functions.
5226
// Horizontally add/subtract adjacent floating point values
5327
// in `left` and `right`.

src/tools/miri/tests/pass/intrinsics-x86-sse2.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ mod tests {
117117
#[target_feature(enable = "sse2")]
118118
unsafe fn test_mm_sad_epu8() {
119119
#[rustfmt::skip]
120-
let a = _mm_setr_epi8(
121-
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
122-
1, 2, 3, 4,
123-
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
124-
1, 2, 3, 4,
125-
);
120+
let a = _mm_setr_epi8(
121+
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
122+
1, 2, 3, 4,
123+
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
124+
1, 2, 3, 4,
125+
);
126126
let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
127127
let r = _mm_sad_epu8(a, b);
128128
let e = _mm_setr_epi64x(1020, 614);

0 commit comments

Comments
 (0)