From 3b92c2c7e9b011bb5aebea19057e4d878684e998 Mon Sep 17 00:00:00 2001
From: Florian Hahn <florian.hahn@arm.com>
Date: Mon, 21 May 2018 09:28:57 +0100
Subject: [PATCH 1/3] i#2626 AArch64 encoder: Add isz operand and vector ADD to
 encoder.

This patch adds an isz operand to encode the vector element width for
non-FP vector instructions. It also adds support for vector ADD to the
encoder/decoder. Additional tests and macros should be added once the
script in the project-aarch64-generate-patterns branch gets updated.

Issue #2626

Change-Id: I2bca21610205c3b2ba7bb67f990fe108d210001c
---
 core/arch/aarch64/codec.c   | 19 +++++++++++++++++++
 core/arch/aarch64/codec.txt |  6 ++++++
 suite/tests/api/dis-a64.txt | 10 ++++++++++
 3 files changed, 35 insertions(+)

diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c
index 88a2248d593..7badef73494 100644
--- a/core/arch/aarch64/codec.c
+++ b/core/arch/aarch64/codec.c
@@ -2744,6 +2744,25 @@ encode_opnd_fsz16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
     return false;
 }
 
+/* isz: Vector element width for SIMD instructions. */
+static inline bool
+decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    uint bits = enc >> 22 & 3;
+    *opnd = opnd_create_immed_int(bits, OPSZ_2b);
+    return true;
+}
+
+static inline bool
+encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    if (opnd_get_immed_int(opnd) < 0 || opnd_get_immed_int(opnd) > 3) {
+        return false;
+    }
+    *enc_out = opnd_get_immed_int(opnd) << 22;
+    return true;
+}
+
 /******************************************************************************/
 
 /* Include automatically generated decoder and encoder. */
diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt
index c189bc51dcf..ef4e8c83751 100644
--- a/core/arch/aarch64/codec.txt
+++ b/core/arch/aarch64/codec.txt
@@ -151,6 +151,8 @@ x----------------xxxxx----------  wx10       # W/X register (or WZR/XZR)
 x----------xxxxx----------------  wx16       # W/X register (or WZR/XZR)
 ---------xx---------------------  fsz        # element size of FP vector reg (single (0x1) and double (0x3) encoding)
 ---------xx---------------------  fsz16      # element size of FP vector reg (half (0x2) encoding)
+--------xx----------------------  isz        # element size of a vector register
+                                             # (byte (0), half (1), single (2), double (3))
 
 # Scalar floating point operands
 # H, S or D register including type (bits 22 and 23) for FP instruction. Half precision
@@ -951,6 +953,10 @@ x101101011000000000101xxxxxxxxxx  cls     wx0 : wx5
 1101101011000000000011xxxxxxxxxx  rev     x0 : x5
 
 # Data Processing - Scalar Floating-Point and Advanced SIMD
+
+# ADD
+0x001110xx1xxxxx100001xxxxxxxxxx     add dq0 : dq5 dq16 isz
+
 # FMOV (general) GPR to FP reg
 0001111011100111000000xxxxxxxxxx     fmov h0 : w5 # Armv8.2
 0001111000100111000000xxxxxxxxxx     fmov s0 : w5
diff --git a/suite/tests/api/dis-a64.txt b/suite/tests/api/dis-a64.txt
index cbe6988abd2..59a2f14111f 100644
--- a/suite/tests/api/dis-a64.txt
+++ b/suite/tests/api/dis-a64.txt
@@ -1561,6 +1561,16 @@ fd3fffff : str    d31, [sp,#32760]        : str    %d31 -> +0x7ff8(%sp)[8byte]
 fd481041 : ldr    d1, [x2,#4128]          : ldr    +0x1020(%x2)[8byte] -> %d1
 fd7fffff : ldr    d31, [sp,#32760]        : ldr    +0x7ff8(%sp)[8byte] -> %d31
 
+
+# ADD (vector)
+4e2c856a : add v10.16b, v11.16b, v12.16b : add    %q11 %q12 $0x00 -> %q10
+0e2584a5 : add v5.8b, v5.8b, v5.8b : add    %d5 %d5 $0x00 -> %d5
+4e7f87c3 : add v3.8h, v30.8h, v31.8h : add    %q30 %q31 $0x01 -> %q3
+0e7f87c3 : add v3.4h, v30.4h, v31.4h : add    %d30 %d31 $0x01 -> %d3
+4ebd8633 : add v19.4s, v17.4s, v29.4s : add    %q17 %q29 $0x02 -> %q19
+0ebd8633 : add v19.2s, v17.2s, v29.2s : add    %d17 %d29 $0x02 -> %d19
+4ee9852d : add v13.2d, v9.2d, v9.2d : add    %q9 %q9 $0x03 -> %q13
+
 # FMOV (general) GPR to FP reg
 1ee70220 : fmov  h0, w17 : fmov   %w17 -> %h0
 1e27012a : fmov  s10, w9 : fmov   %w9 -> %s10

From eb63f9cd782ab13cd7ad36ab3ac22014f3eae2cb Mon Sep 17 00:00:00 2001
From: Florian Hahn <florian.hahn@arm.com>
Date: Wed, 23 May 2018 14:27:55 +0100
Subject: [PATCH 2/3] Some style changes

Change-Id: I7414163022cb784fdd4d7af29cb6c184e7394c45
---
 core/arch/aarch64/codec.c        | 38 ++++++++++++++++----------------
 core/arch/aarch64/codec.txt      |  4 ++--
 core/arch/aarch64/instr_create.h | 24 ++++++++++++++++++++
 3 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c
index 274b718cfee..fc9edf79630 100644
--- a/core/arch/aarch64/codec.c
+++ b/core/arch/aarch64/codec.c
@@ -1966,6 +1966,25 @@ encode_opnd_s10(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
     return encode_opnd_vector_reg(10, 2, opnd, enc_out);
 }
 
+/* isz: Vector element width for SIMD instructions. */
+
+static inline bool
+decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    uint bits = enc >> 22 & 3;
+    *opnd = opnd_create_immed_int(bits, OPSZ_2b);
+    return true;
+}
+
+static inline bool
+encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    if (opnd_get_immed_int(opnd) < ISZ_BYTE || opnd_get_immed_int(opnd) > ISZ_DOUBLE)
+        return false;
+    *enc_out = opnd_get_immed_int(opnd) << 22;
+    return true;
+}
+
 /* shift3: shift type for ADD/SUB: LSL, LSR or ASR */
 
 static inline bool
@@ -2826,25 +2845,6 @@ encode_opnd_fsz16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
     return false;
 }
 
-/* isz: Vector element width for SIMD instructions. */
-static inline bool
-decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
-{
-    uint bits = enc >> 22 & 3;
-    *opnd = opnd_create_immed_int(bits, OPSZ_2b);
-    return true;
-}
-
-static inline bool
-encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
-{
-    if (opnd_get_immed_int(opnd) < 0 || opnd_get_immed_int(opnd) > 3) {
-        return false;
-    }
-    *enc_out = opnd_get_immed_int(opnd) << 22;
-    return true;
-}
-
 /******************************************************************************/
 
 /* Include automatically generated decoder and encoder. */
diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt
index 750fd73377b..30bad7d0be2 100644
--- a/core/arch/aarch64/codec.txt
+++ b/core/arch/aarch64/codec.txt
@@ -131,8 +131,8 @@
 ---------?x---------x-----------  vindex_SD  # Index for vector with single or double
                                              # elements, depending on bit 22 (sz)
 ?--------xx---------------------  imm16sh    # shift for MOVK/... (immediate); checks 31
---------xx----------------------  isz        # element size of a vector register
-                                             # (byte (0), half (1), single (2), double (3))
+--------xx----------------------  isz        # element size of a vector register (
+                                             # byte (0), half (1), single (2), double (3))
 --------xx----------------------  shift3     # shift type for add/sub (shifted register)
 --------xx----------------------  shift4     # shift type for logical (shifted register)
 ??---?--xxxxxxxxxxxxxxxxxxx-----  memlit     # load literal, gets size from 31:30 and 26
diff --git a/core/arch/aarch64/instr_create.h b/core/arch/aarch64/instr_create.h
index 9ce53310012..4192ce81228 100644
--- a/core/arch/aarch64/instr_create.h
+++ b/core/arch/aarch64/instr_create.h
@@ -41,6 +41,30 @@
 /* DR_API EXPORT TOFILE dr_ir_macros_aarch64.h */
 /* DR_API EXPORT BEGIN */
 
+/**
+ * Used in an additional immediate source operand to a vector operation, denotes
+ * 8 bit vector element width. See \ref sec_IR_AArch64.
+ */
+#define ISZ_BYTE 0
+
+/**
+ * Used in an additional immediate source operand to a vector operation, denotes
+ * 16 bit vector element width. See \ref sec_IR_AArch64.
+ */
+#define ISZ_HALF 1
+
+/**
+ * Used in an additional immediate source operand to a vector operation, denotes
+ * 32 bit vector element width. See \ref sec_IR_AArch64.
+ */
+#define ISZ_SINGLE 2
+
+/**
+ * Used in an additional immediate source operand to a vector operation, denotes
+ * 64 bit vector element width. See \ref sec_IR_AArch64.
+ */
+#define ISZ_DOUBLE 3
+
 /**
  * Used in an additional immediate source operand to a vector operation, denotes
  * half-precision floating point vector elements. See \ref sec_IR_AArch64.

From 636028894a0c311eda2d09bc629254b0c61c22c9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <florian.hahn@arm.com>
Date: Thu, 24 May 2018 14:26:00 +0100
Subject: [PATCH 3/3] Address Edmund's comments.

Change-Id: I4c017e16910f18a3b56bb1fc59df743aea908e40
---
 core/arch/aarch64/codec.c   |  5 +++--
 core/arch/aarch64/codec.txt |  3 +--
 suite/tests/api/dis-a64.txt | 12 ++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c
index fc9edf79630..84c9a27ce3b 100644
--- a/core/arch/aarch64/codec.c
+++ b/core/arch/aarch64/codec.c
@@ -1979,9 +1979,10 @@ decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 static inline bool
 encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    if (opnd_get_immed_int(opnd) < ISZ_BYTE || opnd_get_immed_int(opnd) > ISZ_DOUBLE)
+    ptr_int_t val = opnd_get_immed_int(opnd);
+    if ( val < 0 || val > 3)
         return false;
-    *enc_out = opnd_get_immed_int(opnd) << 22;
+    *enc_out = val << 22;
     return true;
 }
 
diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt
index 30bad7d0be2..af184d2bbda 100644
--- a/core/arch/aarch64/codec.txt
+++ b/core/arch/aarch64/codec.txt
@@ -131,8 +131,7 @@
 ---------?x---------x-----------  vindex_SD  # Index for vector with single or double
                                              # elements, depending on bit 22 (sz)
 ?--------xx---------------------  imm16sh    # shift for MOVK/... (immediate); checks 31
---------xx----------------------  isz        # element size of a vector register (
-                                             # byte (0), half (1), single (2), double (3))
+--------xx----------------------  isz        # element size of a vector register (8<<isz)
 --------xx----------------------  shift3     # shift type for add/sub (shifted register)
 --------xx----------------------  shift4     # shift type for logical (shifted register)
 ??---?--xxxxxxxxxxxxxxxxxxx-----  memlit     # load literal, gets size from 31:30 and 26
diff --git a/suite/tests/api/dis-a64.txt b/suite/tests/api/dis-a64.txt
index 09afff473b2..5ad82f32b2a 100644
--- a/suite/tests/api/dis-a64.txt
+++ b/suite/tests/api/dis-a64.txt
@@ -1564,12 +1564,12 @@ fd7fffff : ldr    d31, [sp,#32760]        : ldr    +0x7ff8(%sp)[8byte] -> %d31
 
 # ADD (vector)
 4e2c856a : add v10.16b, v11.16b, v12.16b : add    %q11 %q12 $0x00 -> %q10
-0e2584a5 : add v5.8b, v5.8b, v5.8b : add    %d5 %d5 $0x00 -> %d5
-4e7f87c3 : add v3.8h, v30.8h, v31.8h : add    %q30 %q31 $0x01 -> %q3
-0e7f87c3 : add v3.4h, v30.4h, v31.4h : add    %d30 %d31 $0x01 -> %d3
-4ebd8633 : add v19.4s, v17.4s, v29.4s : add    %q17 %q29 $0x02 -> %q19
-0ebd8633 : add v19.2s, v17.2s, v29.2s : add    %d17 %d29 $0x02 -> %d19
-4ee9852d : add v13.2d, v9.2d, v9.2d : add    %q9 %q9 $0x03 -> %q13
+0e2584a5 : add v5.8b, v5.8b, v5.8b       : add    %d5 %d5 $0x00 -> %d5
+4e7f87c3 : add v3.8h, v30.8h, v31.8h     : add    %q30 %q31 $0x01 -> %q3
+0e7f87c3 : add v3.4h, v30.4h, v31.4h     : add    %d30 %d31 $0x01 -> %d3
+4ebd8633 : add v19.4s, v17.4s, v29.4s    : add    %q17 %q29 $0x02 -> %q19
+0ebd8633 : add v19.2s, v17.2s, v29.2s    : add    %d17 %d29 $0x02 -> %d19
+4ee9852d : add v13.2d, v9.2d, v9.2d      : add    %q9 %q9 $0x03 -> %q13
 
 # FMOV (general) GPR to FP reg
 1ee70220 : fmov  h0, w17 : fmov   %w17 -> %h0