@@ -538,3 +538,125 @@ util.func public @conv_nchw_extf_both(%arg0 : tensor<1x5x10x10xf16>,
538
538
// CHECK-SAME: %[[ARG1:.+]]: tensor<5x5x3x3xf16>
539
539
// CHECK: %[[RESULT:.+]] = linalg.conv_2d_nchw_fchw {{.*}} ins(%[[ARG0]], %[[ARG1]]
540
540
// CHECK: util.return %[[RESULT]]
541
+
542
+ // -----
543
+
544
+ util.func public @matmul_extsi (%arg0 : tensor <10 x20 xi32 >,
545
+ %arg1 : tensor <20 x40 xi16 >) -> tensor <10 x40 xi32 > {
546
+ %0 = tensor.empty () : tensor <20 x40 xi32 >
547
+ %1 = linalg.generic {
548
+ indexing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>, affine_map <(d0 , d1 ) -> (d0 , d1 )>],
549
+ iterator_types = [" parallel" , " parallel" ]}
550
+ ins (%arg1 : tensor <20 x40 xi16 >) outs (%0 : tensor <20 x40 xi32 >) {
551
+ ^bb0 (%b0 : i16 , %b1 : i32 ):
552
+ %e = arith.extsi %b0 : i16 to i32
553
+ linalg.yield %e : i32
554
+ } -> tensor <20 x40 xi32 >
555
+ %2 = tensor.empty () : tensor <10 x40 xi32 >
556
+ %3 = arith.constant 0 : i32
557
+ %4 = linalg.fill ins (%3 : i32 ) outs (%2 : tensor <10 x40 xi32 >) -> tensor <10 x40 xi32 >
558
+ %5 = linalg.matmul ins (%arg0 , %1 : tensor <10 x20 xi32 >, tensor <20 x40 xi32 >)
559
+ outs (%4 : tensor <10 x40 xi32 >) -> tensor <10 x40 xi32 >
560
+ util.return %5 : tensor <10 x40 xi32 >
561
+ }
562
+ // CHECK-LABEL: util.func public @matmul_extsi
563
+ // CHECK-SAME: %[[ARG0:.+]]: tensor<10x20xi32>
564
+ // CHECK-SAME: %[[ARG1:.+]]: tensor<20x40xi16>
565
+ // CHECK: %[[RESULT:.+]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]]
566
+ // CHECK: util.return %[[RESULT]]
567
+ // -----
568
+
569
+ util.func public @matmul_extsi_a (%arg0 : tensor <10 x20 xi16 >,
570
+ %arg1 : tensor <20 x40 xi32 >) -> tensor <10 x40 xi32 > {
571
+ %0 = tensor.empty () : tensor <10 x20 xi32 >
572
+ %1 = linalg.generic {
573
+ indexing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>, affine_map <(d0 , d1 ) -> (d0 , d1 )>],
574
+ iterator_types = [" parallel" , " parallel" ]}
575
+ ins (%arg0 : tensor <10 x20 xi16 >) outs (%0 : tensor <10 x20 xi32 >) {
576
+ ^bb0 (%b0 : i16 , %b1 : i32 ):
577
+ %e = arith.extsi %b0 : i16 to i32
578
+ linalg.yield %e : i32
579
+ } -> tensor <10 x20 xi32 >
580
+ %2 = tensor.empty () : tensor <10 x40 xi32 >
581
+ %3 = arith.constant 0 : i32
582
+ %4 = linalg.fill ins (%3 : i32 ) outs (%2 : tensor <10 x40 xi32 >) -> tensor <10 x40 xi32 >
583
+ %5 = linalg.matmul ins (%1 , %arg1 : tensor <10 x20 xi32 >, tensor <20 x40 xi32 >)
584
+ outs (%4 : tensor <10 x40 xi32 >) -> tensor <10 x40 xi32 >
585
+ util.return %5 : tensor <10 x40 xi32 >
586
+ }
587
+ // CHECK-LABEL: util.func public @matmul_extsi_a
588
+ // CHECK-SAME: %[[ARG0:.+]]: tensor<10x20xi16>
589
+ // CHECK-SAME: %[[ARG1:.+]]: tensor<20x40xi32>
590
+ // CHECK: %[[RESULT:.+]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]]
591
+ // CHECK: util.return %[[RESULT]]
592
+
593
+ // -----
594
+
595
+ util.func public @matmul_extsi_both (%arg0 : tensor <10 x20 xi16 >,
596
+ %arg1 : tensor <20 x40 xi16 >) -> tensor <10 x40 xi32 > {
597
+ %0 = tensor.empty () : tensor <10 x20 xi32 >
598
+ %1 = linalg.generic {
599
+ indexing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>, affine_map <(d0 , d1 ) -> (d0 , d1 )>],
600
+ iterator_types = [" parallel" , " parallel" ]}
601
+ ins (%arg0 : tensor <10 x20 xi16 >) outs (%0 : tensor <10 x20 xi32 >) {
602
+ ^bb0 (%b0 : i16 , %b1 : i32 ):
603
+ %e = arith.extsi %b0 : i16 to i32
604
+ linalg.yield %e : i32
605
+ } -> tensor <10 x20 xi32 >
606
+ %2 = tensor.empty () : tensor <20 x40 xi32 >
607
+ %3 = linalg.generic {
608
+ indexing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>, affine_map <(d0 , d1 ) -> (d0 , d1 )>],
609
+ iterator_types = [" parallel" , " parallel" ]}
610
+ ins (%arg1 : tensor <20 x40 xi16 >) outs (%2 : tensor <20 x40 xi32 >) {
611
+ ^bb0 (%b2 : i16 , %b3 : i32 ):
612
+ %e1 = arith.extsi %b2 : i16 to i32
613
+ linalg.yield %e1 : i32
614
+ } -> tensor <20 x40 xi32 >
615
+ %4 = tensor.empty () : tensor <10 x40 xi32 >
616
+ %5 = arith.constant 0 : i32
617
+ %6 = linalg.fill ins (%5 : i32 ) outs (%4 : tensor <10 x40 xi32 >) -> tensor <10 x40 xi32 >
618
+ %7 = linalg.matmul ins (%1 , %3 : tensor <10 x20 xi32 >, tensor <20 x40 xi32 >)
619
+ outs (%6 : tensor <10 x40 xi32 >) -> tensor <10 x40 xi32 >
620
+ util.return %7 : tensor <10 x40 xi32 >
621
+ }
622
+ // CHECK-LABEL: util.func public @matmul_extsi_both
623
+ // CHECK-SAME: %[[ARG0:.+]]: tensor<10x20xi16>
624
+ // CHECK-SAME: %[[ARG1:.+]]: tensor<20x40xi16>
625
+ // CHECK: %[[RESULT:.+]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]]
626
+ // CHECK: util.return %[[RESULT]]
627
+
628
+ // -----
629
+
630
+ util.func public @conv_nchw_extsi_both (%arg0 : tensor <1 x5 x10 x10 xi16 >,
631
+ %arg1 : tensor <5 x5 x3 x3 xi16 >) -> tensor <1 x5 x8 x8 xi32 > {
632
+ %0 = tensor.empty () : tensor <1 x5 x10 x10 xi32 >
633
+ %1 = linalg.generic {
634
+ indexing_maps = [affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>],
635
+ iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]}
636
+ ins (%arg0 : tensor <1 x5 x10 x10 xi16 >) outs (%0 : tensor <1 x5 x10 x10 xi32 >) {
637
+ ^bb0 (%b0 : i16 , %b1 : i32 ):
638
+ %e = arith.extsi %b0 : i16 to i32
639
+ linalg.yield %e : i32
640
+ } -> tensor <1 x5 x10 x10 xi32 >
641
+ %2 = tensor.empty () : tensor <5 x5 x3 x3 xi32 >
642
+ %3 = linalg.generic {
643
+ indexing_maps = [affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>],
644
+ iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]}
645
+ ins (%arg1 : tensor <5 x5 x3 x3 xi16 >) outs (%2 : tensor <5 x5 x3 x3 xi32 >) {
646
+ ^bb0 (%b2 : i16 , %b3 : i32 ):
647
+ %e1 = arith.extsi %b2 : i16 to i32
648
+ linalg.yield %e1 : i32
649
+ } -> tensor <5 x5 x3 x3 xi32 >
650
+ %4 = tensor.empty () : tensor <1 x5 x8 x8 xi32 >
651
+ %5 = arith.constant 0 : i32
652
+ %6 = linalg.fill ins (%5 : i32 ) outs (%4 : tensor <1 x5 x8 x8 xi32 >) -> tensor <1 x5 x8 x8 xi32 >
653
+ %7 = linalg.conv_2d_nchw_fchw {dilations = dense <1 > : vector <2 xi64 >, strides = dense <1 > : vector <2 xi64 >}
654
+ ins (%1 , %3 : tensor <1 x5 x10 x10 xi32 >, tensor <5 x5 x3 x3 xi32 >)
655
+ outs (%6 : tensor <1 x5 x8 x8 xi32 >) -> tensor <1 x5 x8 x8 xi32 >
656
+ util.return %7 : tensor <1 x5 x8 x8 xi32 >
657
+ }
658
+ // CHECK-LABEL: util.func public @conv_nchw_extsi_both
659
+ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x5x10x10xi16>
660
+ // CHECK-SAME: %[[ARG1:.+]]: tensor<5x5x3x3xi16>
661
+ // CHECK: %[[RESULT:.+]] = linalg.conv_2d_nchw_fchw {{.*}} ins(%[[ARG0]], %[[ARG1]]
662
+ // CHECK: util.return %[[RESULT]]
0 commit comments