@@ -494,6 +494,160 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
494
494
}
495
495
}
496
496
497
+ "llvm.x86.avx2.packuswb" => {
498
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16&ig_expand=4906
499
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
500
+
501
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
502
+ let layout = a. layout ( ) ;
503
+
504
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
505
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
506
+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
507
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. u8 ) ;
508
+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
509
+
510
+ let zero = fx. bcx . ins ( ) . iconst ( types:: I16 , 0 ) ;
511
+ let max_u8 = fx. bcx . ins ( ) . iconst ( types:: I16 , 255 ) ;
512
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . u8 ) ;
513
+
514
+ for idx in 0 ..lane_count / 2 {
515
+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
516
+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
517
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
518
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
519
+
520
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
521
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
522
+ }
523
+
524
+ for idx in 0 ..lane_count / 2 {
525
+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
526
+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
527
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
528
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
529
+
530
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
531
+ ret. place_lane ( fx, lane_count / 2 + idx) . write_cvalue ( fx, res_lane) ;
532
+ }
533
+
534
+ for idx in 0 ..lane_count / 2 {
535
+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
536
+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
537
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
538
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
539
+
540
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
541
+ ret. place_lane ( fx, lane_count / 2 * 2 + idx) . write_cvalue ( fx, res_lane) ;
542
+ }
543
+
544
+ for idx in 0 ..lane_count / 2 {
545
+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
546
+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
547
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
548
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
549
+
550
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
551
+ ret. place_lane ( fx, lane_count / 2 * 3 + idx) . write_cvalue ( fx, res_lane) ;
552
+ }
553
+ }
554
+
555
+ "llvm.x86.sse2.packssdw.128" => {
556
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
557
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
558
+
559
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
560
+ let layout = a. layout ( ) ;
561
+
562
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
563
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
564
+ assert_eq ! ( lane_ty, fx. tcx. types. i32 ) ;
565
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
566
+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
567
+
568
+ let min_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MIN as u16 ) ) ;
569
+ let max_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MAX as u16 ) ) ;
570
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
571
+
572
+ for idx in 0 ..lane_count {
573
+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
574
+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
575
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
576
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
577
+
578
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
579
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
580
+ }
581
+
582
+ for idx in 0 ..lane_count {
583
+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
584
+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
585
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
586
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
587
+
588
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
589
+ ret. place_lane ( fx, lane_count + idx) . write_cvalue ( fx, res_lane) ;
590
+ }
591
+ }
592
+
593
+ "llvm.x86.avx2.packssdw" => {
594
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32&ig_expand=4892
595
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
596
+
597
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
598
+ let layout = a. layout ( ) ;
599
+
600
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
601
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
602
+ assert_eq ! ( lane_ty, fx. tcx. types. i32 ) ;
603
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
604
+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
605
+
606
+ let min_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MIN as u16 ) ) ;
607
+ let max_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MAX as u16 ) ) ;
608
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
609
+
610
+ for idx in 0 ..lane_count / 2 {
611
+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
612
+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
613
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
614
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
615
+
616
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
617
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
618
+ }
619
+
620
+ for idx in 0 ..lane_count / 2 {
621
+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
622
+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
623
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
624
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
625
+
626
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
627
+ ret. place_lane ( fx, lane_count / 2 + idx) . write_cvalue ( fx, res_lane) ;
628
+ }
629
+
630
+ for idx in 0 ..lane_count / 2 {
631
+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
632
+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
633
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
634
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
635
+
636
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
637
+ ret. place_lane ( fx, lane_count / 2 * 2 + idx) . write_cvalue ( fx, res_lane) ;
638
+ }
639
+
640
+ for idx in 0 ..lane_count / 2 {
641
+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
642
+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
643
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
644
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
645
+
646
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
647
+ ret. place_lane ( fx, lane_count / 2 * 3 + idx) . write_cvalue ( fx, res_lane) ;
648
+ }
649
+ }
650
+
497
651
_ => {
498
652
fx. tcx
499
653
. sess
0 commit comments