Skip to content

Commit 06be70e

Browse files
authoredDec 15, 2022
Implement simd_gather and simd_scatter (rust-lang#1309)
These are the last remaining platform intrinsics necessary for portable-simd.
1 parent 0865e5a commit 06be70e

File tree

2 files changed

+74
-37
lines changed

2 files changed

+74
-37
lines changed
 

‎patches/0001-portable-simd-Disable-unsupported-tests.patch

-35
This file was deleted.

‎src/intrinsics/simd.rs

+74-2
Original file line numberDiff line numberDiff line change
@@ -801,8 +801,80 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
801801
}
802802
}
803803

804-
// simd_scatter
805-
// simd_gather
804+
sym::simd_gather => {
805+
intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
806+
807+
let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
808+
let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
809+
let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
810+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
811+
assert_eq!(val_lane_count, ptr_lane_count);
812+
assert_eq!(val_lane_count, mask_lane_count);
813+
assert_eq!(val_lane_count, ret_lane_count);
814+
815+
let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
816+
let ret_lane_layout = fx.layout_of(ret_lane_ty);
817+
818+
for lane_idx in 0..ptr_lane_count {
819+
let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
820+
let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
821+
let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
822+
823+
let if_enabled = fx.bcx.create_block();
824+
let if_disabled = fx.bcx.create_block();
825+
let next = fx.bcx.create_block();
826+
let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
827+
828+
fx.bcx.ins().brnz(mask_lane, if_enabled, &[]);
829+
fx.bcx.ins().jump(if_disabled, &[]);
830+
fx.bcx.seal_block(if_enabled);
831+
fx.bcx.seal_block(if_disabled);
832+
833+
fx.bcx.switch_to_block(if_enabled);
834+
let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), ptr_lane, 0);
835+
fx.bcx.ins().jump(next, &[res]);
836+
837+
fx.bcx.switch_to_block(if_disabled);
838+
fx.bcx.ins().jump(next, &[val_lane]);
839+
840+
fx.bcx.seal_block(next);
841+
fx.bcx.switch_to_block(next);
842+
843+
ret.place_lane(fx, lane_idx)
844+
.write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
845+
}
846+
}
847+
848+
sym::simd_scatter => {
849+
intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
850+
851+
let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
852+
let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
853+
let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
854+
assert_eq!(val_lane_count, ptr_lane_count);
855+
assert_eq!(val_lane_count, mask_lane_count);
856+
857+
for lane_idx in 0..ptr_lane_count {
858+
let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
859+
let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
860+
let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
861+
862+
let if_enabled = fx.bcx.create_block();
863+
let next = fx.bcx.create_block();
864+
865+
fx.bcx.ins().brnz(mask_lane, if_enabled, &[]);
866+
fx.bcx.ins().jump(next, &[]);
867+
fx.bcx.seal_block(if_enabled);
868+
869+
fx.bcx.switch_to_block(if_enabled);
870+
fx.bcx.ins().store(MemFlags::trusted(), val_lane, ptr_lane, 0);
871+
fx.bcx.ins().jump(next, &[]);
872+
873+
fx.bcx.seal_block(next);
874+
fx.bcx.switch_to_block(next);
875+
}
876+
}
877+
806878
_ => {
807879
fx.tcx.sess.span_fatal(span, &format!("Unknown SIMD intrinsic {}", intrinsic));
808880
}

0 commit comments

Comments
 (0)
Please sign in to comment.