Skip to content

Commit 07eba98

Browse files
committed
[AArch64][SVE2] Asm: implement CDOT instruction
Summary: The complex DOT instructions perform a dot-product on quadtuplets from two source vectors and the resuling wide real or wide imaginary is accumulated into the destination register. The instructions come in two forms: Vector form, e.g. cdot z0.s, z1.b, z2.b, #90 - complex dot product on four 8-bit quad-tuplets, accumulating results in 32-bit elements. The complex numbers in the second source vector are rotated by 90 degrees. cdot z0.d, z1.h, z2.h, #180 - complex dot product on four 16-bit quad-tuplets, accumulating results in 64-bit elements. The complex numbers in the second source vector are rotated by 180 degrees. Indexed form, e.g. cdot z0.s, z1.b, z2.b[3], #0 - complex dot product on four 8-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 32-bit elements. cdot z0.d, z1.h, z2.h[1], #0 - complex dot product on four 16-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 64-bit elements. The specification can be found here: https://developer.arm.com/docs/ddi0602/latest Reviewed By: SjoerdMeijer, rovka Differential Revision: https://reviews.llvm.org/D61903 llvm-svn: 360870
1 parent 3cbf3c8 commit 07eba98

File tree

4 files changed

+278
-0
lines changed

4 files changed

+278
-0
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

+6
Original file line numberDiff line numberDiff line change
@@ -1051,4 +1051,10 @@ let Predicates = [HasSVE2] in {
10511051
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
10521052
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
10531053
def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
1054+
1055+
// SVE2 complex integer dot product (indexed)
1056+
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
1057+
1058+
// SVE2 complex integer dot product
1059+
defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
10541060
}

llvm/lib/Target/AArch64/SVEInstrFormats.td

+73
Original file line numberDiff line numberDiff line change
@@ -1837,6 +1837,79 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
18371837
}
18381838
}
18391839

1840+
//===----------------------------------------------------------------------===//
1841+
// SVE2 Complex Integer Dot Product Group
1842+
//===----------------------------------------------------------------------===//
1843+
1844+
class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
1845+
ZPRRegOp zprty1, ZPRRegOp zprty2>
1846+
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm,
1847+
complexrotateop:$rot),
1848+
asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> {
1849+
bits<5> Zda;
1850+
bits<5> Zn;
1851+
bits<5> Zm;
1852+
bits<2> rot;
1853+
let Inst{31-24} = 0b01000100;
1854+
let Inst{23-22} = sz;
1855+
let Inst{21} = 0b0;
1856+
let Inst{20-16} = Zm;
1857+
let Inst{15-12} = opc;
1858+
let Inst{11-10} = rot;
1859+
let Inst{9-5} = Zn;
1860+
let Inst{4-0} = Zda;
1861+
1862+
let Constraints = "$Zda = $_Zda";
1863+
let DestructiveInstType = Destructive;
1864+
let ElementSize = ElementSizeNone;
1865+
}
1866+
1867+
multiclass sve2_cintx_dot<string asm> {
1868+
def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
1869+
def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
1870+
}
1871+
1872+
//===----------------------------------------------------------------------===//
1873+
// SVE2 Complex Integer Dot Product - Indexed Group
1874+
//===----------------------------------------------------------------------===//
1875+
1876+
class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
1877+
ZPRRegOp zprty1, ZPRRegOp zprty2,
1878+
ZPRRegOp zprty3, Operand itype>
1879+
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop,
1880+
complexrotateop:$rot),
1881+
asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> {
1882+
bits<5> Zda;
1883+
bits<5> Zn;
1884+
bits<2> rot;
1885+
let Inst{31-24} = 0b01000100;
1886+
let Inst{23-22} = sz;
1887+
let Inst{21} = 0b1;
1888+
let Inst{15-12} = opc;
1889+
let Inst{11-10} = rot;
1890+
let Inst{9-5} = Zn;
1891+
let Inst{4-0} = Zda;
1892+
1893+
let Constraints = "$Zda = $_Zda";
1894+
let DestructiveInstType = Destructive;
1895+
let ElementSize = ElementSizeNone;
1896+
}
1897+
1898+
multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
1899+
def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
1900+
bits<2> iop;
1901+
bits<3> Zm;
1902+
let Inst{20-19} = iop;
1903+
let Inst{18-16} = Zm;
1904+
}
1905+
def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
1906+
bit iop;
1907+
bits<4> Zm;
1908+
let Inst{20} = iop;
1909+
let Inst{19-16} = Zm;
1910+
}
1911+
}
1912+
18401913
//===----------------------------------------------------------------------===//
18411914
// SVE2 Integer Multiply - Unpredicated Group
18421915
//===----------------------------------------------------------------------===//
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
2+
3+
4+
// ------------------------------------------------------------------------- //
5+
// Invalid element size
6+
7+
cdot z0.s, z1.h, z31.h, #0
8+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
9+
// CHECK-NEXT: cdot z0.s, z1.h, z31.h, #0
10+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
11+
12+
cdot z0.s, z1.s, z31.s, #0
13+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
14+
// CHECK-NEXT: cdot z0.s, z1.s, z31.s, #0
15+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
16+
17+
cdot z0.s, z1.d, z31.d, #0
18+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
19+
// CHECK-NEXT: cdot z0.s, z1.d, z31.d, #0
20+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
21+
22+
cdot z0.d, z1.b, z31.b, #0
23+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
24+
// CHECK-NEXT: cdot z0.d, z1.b, z31.b, #0
25+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
26+
27+
cdot z0.d, z1.s, z31.s, #0
28+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
29+
// CHECK-NEXT: cdot z0.d, z1.s, z31.s, #0
30+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
31+
32+
cdot z0.d, z1.d, z31.d, #0
33+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
34+
// CHECK-NEXT: cdot z0.d, z1.d, z31.d, #0
35+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
36+
37+
38+
// ------------------------------------------------------------------------- //
39+
// Invalid restricted register for indexed vector.
40+
41+
cdot z0.s, z1.b, z8.b[3], #0
42+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
43+
// CHECK-NEXT: cdot z0.s, z1.b, z8.b[3], #0
44+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
45+
46+
cdot z0.d, z1.h, z16.h[1], #0
47+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
48+
// CHECK-NEXT: cdot z0.d, z1.h, z16.h[1], #0
49+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
50+
51+
52+
// ------------------------------------------------------------------------- //
53+
// Invalid element index
54+
55+
cdot z0.s, z1.b, z7.b[-1], #0
56+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
57+
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[-1], #0
58+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
59+
60+
cdot z0.s, z1.b, z7.b[4], #0
61+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
62+
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[4], #0
63+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
64+
65+
cdot z0.d, z1.h, z15.h[-1], #0
66+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
67+
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[-1], #0
68+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
69+
70+
cdot z0.d, z1.h, z15.h[2], #0
71+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
72+
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[2], #0
73+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
74+
75+
76+
// --------------------------------------------------------------------------//
77+
// Invalid rotation
78+
79+
cdot z0.s, z1.b, z2.b[0], #360
80+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
81+
// CHECK-NEXT: cdot z0.s, z1.b, z2.b[0], #360
82+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
83+
84+
cdot z0.d, z1.h, z2.h[0], #450
85+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
86+
// CHECK-NEXT: cdot z0.d, z1.h, z2.h[0], #450
87+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
88+
89+
90+
// --------------------------------------------------------------------------//
91+
// Negative tests for instructions that are incompatible with movprfx
92+
93+
movprfx z0.d, p0/z, z7.d
94+
cdot z0.d, z1.h, z31.h, #0
95+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
96+
// CHECK-NEXT: cdot z0.d, z1.h, z31.h, #0
97+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
98+
99+
movprfx z0.d, p0/z, z7.d
100+
cdot z0.d, z1.h, z15.h[1], #0
101+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
102+
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[1], #0
103+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

llvm/test/MC/AArch64/SVE2/cdot.s

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
2+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
3+
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
4+
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
5+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
6+
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
7+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
8+
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
9+
10+
cdot z0.s, z1.b, z31.b, #0
11+
// CHECK-INST: cdot z0.s, z1.b, z31.b, #0
12+
// CHECK-ENCODING: [0x20,0x10,0x9f,0x44]
13+
// CHECK-ERROR: instruction requires: sve2
14+
// CHECK-UNKNOWN: 20 10 9f 44 <unknown>
15+
16+
cdot z0.d, z1.h, z31.h, #0
17+
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0
18+
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44]
19+
// CHECK-ERROR: instruction requires: sve2
20+
// CHECK-UNKNOWN: 20 10 df 44 <unknown>
21+
22+
cdot z0.d, z1.h, z31.h, #90
23+
// CHECK-INST: cdot z0.d, z1.h, z31.h, #90
24+
// CHECK-ENCODING: [0x20,0x14,0xdf,0x44]
25+
// CHECK-ERROR: instruction requires: sve2
26+
// CHECK-UNKNOWN: 20 14 df 44 <unknown>
27+
28+
cdot z0.d, z1.h, z31.h, #180
29+
// CHECK-INST: cdot z0.d, z1.h, z31.h, #180
30+
// CHECK-ENCODING: [0x20,0x18,0xdf,0x44]
31+
// CHECK-ERROR: instruction requires: sve2
32+
// CHECK-UNKNOWN: 20 18 df 44 <unknown>
33+
34+
cdot z0.d, z1.h, z31.h, #270
35+
// CHECK-INST: cdot z0.d, z1.h, z31.h, #270
36+
// CHECK-ENCODING: [0x20,0x1c,0xdf,0x44]
37+
// CHECK-ERROR: instruction requires: sve2
38+
// CHECK-UNKNOWN: 20 1c df 44 <unknown>
39+
40+
cdot z0.s, z1.b, z7.b[3], #0
41+
// CHECK-INST: cdot z0.s, z1.b, z7.b[3], #0
42+
// CHECK-ENCODING: [0x20,0x40,0xbf,0x44]
43+
// CHECK-ERROR: instruction requires: sve2
44+
// CHECK-UNKNOWN: 20 40 bf 44 <unknown>
45+
46+
cdot z0.d, z1.h, z15.h[1], #0
47+
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0
48+
// CHECK-ENCODING: [0x20,0x40,0xff,0x44]
49+
// CHECK-ERROR: instruction requires: sve2
50+
// CHECK-UNKNOWN: 20 40 ff 44 <unknown>
51+
52+
cdot z5.d, z6.h, z3.h[0], #90
53+
// CHECK-INST: cdot z5.d, z6.h, z3.h[0], #90
54+
// CHECK-ENCODING: [0xc5,0x44,0xe3,0x44]
55+
// CHECK-ERROR: instruction requires: sve2
56+
// CHECK-UNKNOWN: c5 44 e3 44 <unknown>
57+
58+
cdot z29.d, z30.h, z0.h[0], #180
59+
// CHECK-INST: cdot z29.d, z30.h, z0.h[0], #180
60+
// CHECK-ENCODING: [0xdd,0x4b,0xe0,0x44]
61+
// CHECK-ERROR: instruction requires: sve2
62+
// CHECK-UNKNOWN: dd 4b e0 44 <unknown>
63+
64+
cdot z31.d, z30.h, z7.h[1], #270
65+
// CHECK-INST: cdot z31.d, z30.h, z7.h[1], #270
66+
// CHECK-ENCODING: [0xdf,0x4f,0xf7,0x44]
67+
// CHECK-ERROR: instruction requires: sve2
68+
// CHECK-UNKNOWN: df 4f f7 44 <unknown>
69+
70+
71+
// --------------------------------------------------------------------------//
72+
// Test compatibility with MOVPRFX instruction.
73+
74+
movprfx z0, z7
75+
// CHECK-INST: movprfx z0, z7
76+
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
77+
// CHECK-ERROR: instruction requires: sve
78+
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
79+
80+
cdot z0.d, z1.h, z31.h, #0
81+
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0
82+
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44]
83+
// CHECK-ERROR: instruction requires: sve2
84+
// CHECK-UNKNOWN: 20 10 df 44 <unknown>
85+
86+
movprfx z0, z7
87+
// CHECK-INST: movprfx z0, z7
88+
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
89+
// CHECK-ERROR: instruction requires: sve
90+
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
91+
92+
cdot z0.d, z1.h, z15.h[1], #0
93+
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0
94+
// CHECK-ENCODING: [0x20,0x40,0xff,0x44]
95+
// CHECK-ERROR: instruction requires: sve2
96+
// CHECK-UNKNOWN: 20 40 ff 44 <unknown>

0 commit comments

Comments
 (0)