Skip to content

Commit a5fcfad

Browse files
committed
Fix _mm_sad_epu8 implementation
Fixes #1426
1 parent cd7218a commit a5fcfad

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

src/intrinsics/llvm_x86.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
364364
for out_lane_idx in 0..lane_count / 8 {
365365
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
366366

367-
for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
367+
for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 {
368368
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
369+
let a_lane = fx.bcx.ins().uextend(types::I16, a_lane);
369370
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
371+
let b_lane = fx.bcx.ins().uextend(types::I16, b_lane);
370372

371373
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
372374
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);

0 commit comments

Comments
 (0)