-
Notifications
You must be signed in to change notification settings - Fork 754
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64][SVE2] Asm: implement CDOT instruction
Summary: The complex DOT instructions perform a dot-product on quadtuplets from two source vectors and the resuling wide real or wide imaginary is accumulated into the destination register. The instructions come in two forms: Vector form, e.g. cdot z0.s, z1.b, z2.b, #90 - complex dot product on four 8-bit quad-tuplets, accumulating results in 32-bit elements. The complex numbers in the second source vector are rotated by 90 degrees. cdot z0.d, z1.h, z2.h, #180 - complex dot product on four 16-bit quad-tuplets, accumulating results in 64-bit elements. The complex numbers in the second source vector are rotated by 180 degrees. Indexed form, e.g. cdot z0.s, z1.b, z2.b[3], #0 - complex dot product on four 8-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 32-bit elements. cdot z0.d, z1.h, z2.h[1], #0 - complex dot product on four 16-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 64-bit elements. The specification can be found here: https://developer.arm.com/docs/ddi0602/latest Reviewed By: SjoerdMeijer, rovka Differential Revision: https://reviews.llvm.org/D61903 llvm-svn: 360870
- Loading branch information
Showing
4 changed files
with
278 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s | ||
|
||
|
||
// ------------------------------------------------------------------------- // | ||
// Invalid element size | ||
|
||
cdot z0.s, z1.h, z31.h, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width | ||
// CHECK-NEXT: cdot z0.s, z1.h, z31.h, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.s, z1.s, z31.s, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width | ||
// CHECK-NEXT: cdot z0.s, z1.s, z31.s, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.s, z1.d, z31.d, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width | ||
// CHECK-NEXT: cdot z0.s, z1.d, z31.d, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.b, z31.b, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width | ||
// CHECK-NEXT: cdot z0.d, z1.b, z31.b, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.s, z31.s, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width | ||
// CHECK-NEXT: cdot z0.d, z1.s, z31.s, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.d, z31.d, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width | ||
// CHECK-NEXT: cdot z0.d, z1.d, z31.d, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
|
||
// ------------------------------------------------------------------------- // | ||
// Invalid restricted register for indexed vector. | ||
|
||
cdot z0.s, z1.b, z8.b[3], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction | ||
// CHECK-NEXT: cdot z0.s, z1.b, z8.b[3], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.h, z16.h[1], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction | ||
// CHECK-NEXT: cdot z0.d, z1.h, z16.h[1], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
|
||
// ------------------------------------------------------------------------- // | ||
// Invalid element index | ||
|
||
cdot z0.s, z1.b, z7.b[-1], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. | ||
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[-1], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.s, z1.b, z7.b[4], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. | ||
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[4], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.h, z15.h[-1], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. | ||
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[-1], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.h, z15.h[2], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. | ||
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[2], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
|
||
// --------------------------------------------------------------------------// | ||
// Invalid rotation | ||
|
||
cdot z0.s, z1.b, z2.b[0], #360 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270. | ||
// CHECK-NEXT: cdot z0.s, z1.b, z2.b[0], #360 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
cdot z0.d, z1.h, z2.h[0], #450 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270. | ||
// CHECK-NEXT: cdot z0.d, z1.h, z2.h[0], #450 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
|
||
// --------------------------------------------------------------------------// | ||
// Negative tests for instructions that are incompatible with movprfx | ||
|
||
movprfx z0.d, p0/z, z7.d | ||
cdot z0.d, z1.h, z31.h, #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx | ||
// CHECK-NEXT: cdot z0.d, z1.h, z31.h, #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: | ||
|
||
movprfx z0.d, p0/z, z7.d | ||
cdot z0.d, z1.h, z15.h[1], #0 | ||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx | ||
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[1], #0 | ||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \ | ||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST | ||
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ | ||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR | ||
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ | ||
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST | ||
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \ | ||
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN | ||
|
||
cdot z0.s, z1.b, z31.b, #0 | ||
// CHECK-INST: cdot z0.s, z1.b, z31.b, #0 | ||
// CHECK-ENCODING: [0x20,0x10,0x9f,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 10 9f 44 <unknown> | ||
|
||
cdot z0.d, z1.h, z31.h, #0 | ||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0 | ||
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 10 df 44 <unknown> | ||
|
||
cdot z0.d, z1.h, z31.h, #90 | ||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #90 | ||
// CHECK-ENCODING: [0x20,0x14,0xdf,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 14 df 44 <unknown> | ||
|
||
cdot z0.d, z1.h, z31.h, #180 | ||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #180 | ||
// CHECK-ENCODING: [0x20,0x18,0xdf,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 18 df 44 <unknown> | ||
|
||
cdot z0.d, z1.h, z31.h, #270 | ||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #270 | ||
// CHECK-ENCODING: [0x20,0x1c,0xdf,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 1c df 44 <unknown> | ||
|
||
cdot z0.s, z1.b, z7.b[3], #0 | ||
// CHECK-INST: cdot z0.s, z1.b, z7.b[3], #0 | ||
// CHECK-ENCODING: [0x20,0x40,0xbf,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 40 bf 44 <unknown> | ||
|
||
cdot z0.d, z1.h, z15.h[1], #0 | ||
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0 | ||
// CHECK-ENCODING: [0x20,0x40,0xff,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 40 ff 44 <unknown> | ||
|
||
cdot z5.d, z6.h, z3.h[0], #90 | ||
// CHECK-INST: cdot z5.d, z6.h, z3.h[0], #90 | ||
// CHECK-ENCODING: [0xc5,0x44,0xe3,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: c5 44 e3 44 <unknown> | ||
|
||
cdot z29.d, z30.h, z0.h[0], #180 | ||
// CHECK-INST: cdot z29.d, z30.h, z0.h[0], #180 | ||
// CHECK-ENCODING: [0xdd,0x4b,0xe0,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: dd 4b e0 44 <unknown> | ||
|
||
cdot z31.d, z30.h, z7.h[1], #270 | ||
// CHECK-INST: cdot z31.d, z30.h, z7.h[1], #270 | ||
// CHECK-ENCODING: [0xdf,0x4f,0xf7,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: df 4f f7 44 <unknown> | ||
|
||
|
||
// --------------------------------------------------------------------------// | ||
// Test compatibility with MOVPRFX instruction. | ||
|
||
movprfx z0, z7 | ||
// CHECK-INST: movprfx z0, z7 | ||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] | ||
// CHECK-ERROR: instruction requires: sve | ||
// CHECK-UNKNOWN: e0 bc 20 04 <unknown> | ||
|
||
cdot z0.d, z1.h, z31.h, #0 | ||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0 | ||
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 10 df 44 <unknown> | ||
|
||
movprfx z0, z7 | ||
// CHECK-INST: movprfx z0, z7 | ||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] | ||
// CHECK-ERROR: instruction requires: sve | ||
// CHECK-UNKNOWN: e0 bc 20 04 <unknown> | ||
|
||
cdot z0.d, z1.h, z15.h[1], #0 | ||
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0 | ||
// CHECK-ENCODING: [0x20,0x40,0xff,0x44] | ||
// CHECK-ERROR: instruction requires: sve2 | ||
// CHECK-UNKNOWN: 20 40 ff 44 <unknown> |