We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent cd7218a commit a5fcfadCopy full SHA for a5fcfad
src/intrinsics/llvm_x86.rs
@@ -364,9 +364,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
364
for out_lane_idx in 0..lane_count / 8 {
365
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
366
367
- for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
+ for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 {
368
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
369
+ let a_lane = fx.bcx.ins().uextend(types::I16, a_lane);
370
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
371
+ let b_lane = fx.bcx.ins().uextend(types::I16, b_lane);
372
373
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
374
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
0 commit comments