From 3b92c2c7e9b011bb5aebea19057e4d878684e998 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 21 May 2018 09:28:57 +0100 Subject: [PATCH 1/3] i#2626 AArch64 encoder: Add isz operand and vector ADD to encoder. This patch adds an isz operand to encode the vector element width for non-FP vector instructions. It also adds support for vector ADD to the encoder/decoder. Additional tests and macros should be added once the script in the project-aarch64-generate-patterns branch gets updated. Issue #2626 Change-Id: I2bca21610205c3b2ba7bb67f990fe108d210001c --- core/arch/aarch64/codec.c | 19 +++++++++++++++++++ core/arch/aarch64/codec.txt | 6 ++++++ suite/tests/api/dis-a64.txt | 10 ++++++++++ 3 files changed, 35 insertions(+) diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c index 88a2248d593..7badef73494 100644 --- a/core/arch/aarch64/codec.c +++ b/core/arch/aarch64/codec.c @@ -2744,6 +2744,25 @@ encode_opnd_fsz16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out return false; } +/* isz: Vector element width for SIMD instructions. */ +static inline bool +decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + uint bits = enc >> 22 & 3; + *opnd = opnd_create_immed_int(bits, OPSZ_2b); + return true; +} + +static inline bool +encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) +{ + if (opnd_get_immed_int(opnd) < 0 || opnd_get_immed_int(opnd) > 3) { + return false; + } + *enc_out = opnd_get_immed_int(opnd) << 22; + return true; +} + /******************************************************************************/ /* Include automatically generated decoder and encoder. */ diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt index c189bc51dcf..ef4e8c83751 100644 --- a/core/arch/aarch64/codec.txt +++ b/core/arch/aarch64/codec.txt @@ -151,6 +151,8 @@ x----------------xxxxx---------- wx10 # W/X register (or WZR/XZR) x----------xxxxx---------------- wx16 # W/X register (or WZR/XZR) ---------xx--------------------- fsz # element size of FP vector reg (single (0x1) and double (0x3) encoding) ---------xx--------------------- fsz16 # element size of FP vector reg (half (0x2) encoding) +--------xx---------------------- isz # element size of a vector register + # (byte (0), half (1), single (2), double (3)) # Scalar floating point operands # H, S or D register including type (bits 22 and 23) for FP instruction. Half precision @@ -951,6 +953,10 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5 1101101011000000000011xxxxxxxxxx rev x0 : x5 # Data Processing - Scalar Floating-Point and Advanced SIMD + +# ADD +0x001110xx1xxxxx100001xxxxxxxxxx add dq0 : dq5 dq16 isz + # FMOV (general) GPR to FP reg 0001111011100111000000xxxxxxxxxx fmov h0 : w5 # Armv8.2 0001111000100111000000xxxxxxxxxx fmov s0 : w5 diff --git a/suite/tests/api/dis-a64.txt b/suite/tests/api/dis-a64.txt index cbe6988abd2..59a2f14111f 100644 --- a/suite/tests/api/dis-a64.txt +++ b/suite/tests/api/dis-a64.txt @@ -1561,6 +1561,16 @@ fd3fffff : str d31, [sp,#32760] : str %d31 -> +0x7ff8(%sp)[8byte] fd481041 : ldr d1, [x2,#4128] : ldr +0x1020(%x2)[8byte] -> %d1 fd7fffff : ldr d31, [sp,#32760] : ldr +0x7ff8(%sp)[8byte] -> %d31 + +# ADD (vector) +4e2c856a : add v10.16b, v11.16b, v12.16b : add %q11 %q12 $0x00 -> %q10 +0e2584a5 : add v5.8b, v5.8b, v5.8b : add %d5 %d5 $0x00 -> %d5 +4e7f87c3 : add v3.8h, v30.8h, v31.8h : add %q30 %q31 $0x01 -> %q3 +0e7f87c3 : add v3.4h, v30.4h, v31.4h : add %d30 %d31 $0x01 -> %d3 +4ebd8633 : add v19.4s, v17.4s, v29.4s : add %q17 %q29 $0x02 -> %q19 +0ebd8633 : add v19.2s, v17.2s, v29.2s : add %d17 %d29 $0x02 -> %d19 +4ee9852d : add v13.2d, v9.2d, v9.2d : add %q9 %q9 $0x03 -> %q13 + # FMOV (general) GPR to FP reg 1ee70220 : fmov h0, w17 : fmov %w17 -> %h0 1e27012a : fmov s10, w9 : fmov %w9 -> %s10 From eb63f9cd782ab13cd7ad36ab3ac22014f3eae2cb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 23 May 2018 14:27:55 +0100 Subject: [PATCH 2/3] Some style changes Change-Id: I7414163022cb784fdd4d7af29cb6c184e7394c45 --- core/arch/aarch64/codec.c | 38 ++++++++++++++++---------------- core/arch/aarch64/codec.txt | 4 ++-- core/arch/aarch64/instr_create.h | 24 ++++++++++++++++++++ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c index 274b718cfee..fc9edf79630 100644 --- a/core/arch/aarch64/codec.c +++ b/core/arch/aarch64/codec.c @@ -1966,6 +1966,25 @@ encode_opnd_s10(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) return encode_opnd_vector_reg(10, 2, opnd, enc_out); } +/* isz: Vector element width for SIMD instructions. */ + +static inline bool +decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + uint bits = enc >> 22 & 3; + *opnd = opnd_create_immed_int(bits, OPSZ_2b); + return true; +} + +static inline bool +encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) +{ + if (opnd_get_immed_int(opnd) < ISZ_BYTE || opnd_get_immed_int(opnd) > ISZ_DOUBLE) + return false; + *enc_out = opnd_get_immed_int(opnd) << 22; + return true; +} + /* shift3: shift type for ADD/SUB: LSL, LSR or ASR */ static inline bool @@ -2826,25 +2845,6 @@ encode_opnd_fsz16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out return false; } -/* isz: Vector element width for SIMD instructions. */ -static inline bool -decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) -{ - uint bits = enc >> 22 & 3; - *opnd = opnd_create_immed_int(bits, OPSZ_2b); - return true; -} - -static inline bool -encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) -{ - if (opnd_get_immed_int(opnd) < 0 || opnd_get_immed_int(opnd) > 3) { - return false; - } - *enc_out = opnd_get_immed_int(opnd) << 22; - return true; -} - /******************************************************************************/ /* Include automatically generated decoder and encoder. */ diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt index 750fd73377b..30bad7d0be2 100644 --- a/core/arch/aarch64/codec.txt +++ b/core/arch/aarch64/codec.txt @@ -131,8 +131,8 @@ ---------?x---------x----------- vindex_SD # Index for vector with single or double # elements, depending on bit 22 (sz) ?--------xx--------------------- imm16sh # shift for MOVK/... (immediate); checks 31 ---------xx---------------------- isz # element size of a vector register - # (byte (0), half (1), single (2), double (3)) +--------xx---------------------- isz # element size of a vector register ( + # byte (0), half (1), single (2), double (3)) --------xx---------------------- shift3 # shift type for add/sub (shifted register) --------xx---------------------- shift4 # shift type for logical (shifted register) ??---?--xxxxxxxxxxxxxxxxxxx----- memlit # load literal, gets size from 31:30 and 26 diff --git a/core/arch/aarch64/instr_create.h b/core/arch/aarch64/instr_create.h index 9ce53310012..4192ce81228 100644 --- a/core/arch/aarch64/instr_create.h +++ b/core/arch/aarch64/instr_create.h @@ -41,6 +41,30 @@ /* DR_API EXPORT TOFILE dr_ir_macros_aarch64.h */ /* DR_API EXPORT BEGIN */ +/** + * Used in an additional immediate source operand to a vector operation, denotes + * 8 bit vector element width. See \ref sec_IR_AArch64. + */ +#define ISZ_BYTE 0 + +/** + * Used in an additional immediate source operand to a vector operation, denotes + * 16 bit vector element width. See \ref sec_IR_AArch64. + */ +#define ISZ_HALF 1 + +/** + * Used in an additional immediate source operand to a vector operation, denotes + * 32 bit vector element width. See \ref sec_IR_AArch64. + */ +#define ISZ_SINGLE 2 + +/** + * Used in an additional immediate source operand to a vector operation, denotes + * 64 bit vector element width. See \ref sec_IR_AArch64. + */ +#define ISZ_DOUBLE 3 + /** * Used in an additional immediate source operand to a vector operation, denotes * half-precision floating point vector elements. See \ref sec_IR_AArch64. From 636028894a0c311eda2d09bc629254b0c61c22c9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 24 May 2018 14:26:00 +0100 Subject: [PATCH 3/3] Address Edmund's comments. Change-Id: I4c017e16910f18a3b56bb1fc59df743aea908e40 --- core/arch/aarch64/codec.c | 5 +++-- core/arch/aarch64/codec.txt | 3 +-- suite/tests/api/dis-a64.txt | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c index fc9edf79630..84c9a27ce3b 100644 --- a/core/arch/aarch64/codec.c +++ b/core/arch/aarch64/codec.c @@ -1979,9 +1979,10 @@ decode_opnd_isz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) static inline bool encode_opnd_isz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { - if (opnd_get_immed_int(opnd) < ISZ_BYTE || opnd_get_immed_int(opnd) > ISZ_DOUBLE) + ptr_int_t val = opnd_get_immed_int(opnd); + if ( val < 0 || val > 3) return false; - *enc_out = opnd_get_immed_int(opnd) << 22; + *enc_out = val << 22; return true; } diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt index 30bad7d0be2..af184d2bbda 100644 --- a/core/arch/aarch64/codec.txt +++ b/core/arch/aarch64/codec.txt @@ -131,8 +131,7 @@ ---------?x---------x----------- vindex_SD # Index for vector with single or double # elements, depending on bit 22 (sz) ?--------xx--------------------- imm16sh # shift for MOVK/... (immediate); checks 31 ---------xx---------------------- isz # element size of a vector register ( - # byte (0), half (1), single (2), double (3)) +--------xx---------------------- isz # element size of a vector register (8< %d31 # ADD (vector) 4e2c856a : add v10.16b, v11.16b, v12.16b : add %q11 %q12 $0x00 -> %q10 -0e2584a5 : add v5.8b, v5.8b, v5.8b : add %d5 %d5 $0x00 -> %d5 -4e7f87c3 : add v3.8h, v30.8h, v31.8h : add %q30 %q31 $0x01 -> %q3 -0e7f87c3 : add v3.4h, v30.4h, v31.4h : add %d30 %d31 $0x01 -> %d3 -4ebd8633 : add v19.4s, v17.4s, v29.4s : add %q17 %q29 $0x02 -> %q19 -0ebd8633 : add v19.2s, v17.2s, v29.2s : add %d17 %d29 $0x02 -> %d19 -4ee9852d : add v13.2d, v9.2d, v9.2d : add %q9 %q9 $0x03 -> %q13 +0e2584a5 : add v5.8b, v5.8b, v5.8b : add %d5 %d5 $0x00 -> %d5 +4e7f87c3 : add v3.8h, v30.8h, v31.8h : add %q30 %q31 $0x01 -> %q3 +0e7f87c3 : add v3.4h, v30.4h, v31.4h : add %d30 %d31 $0x01 -> %d3 +4ebd8633 : add v19.4s, v17.4s, v29.4s : add %q17 %q29 $0x02 -> %q19 +0ebd8633 : add v19.2s, v17.2s, v29.2s : add %d17 %d29 $0x02 -> %d19 +4ee9852d : add v13.2d, v9.2d, v9.2d : add %q9 %q9 $0x03 -> %q13 # FMOV (general) GPR to FP reg 1ee70220 : fmov h0, w17 : fmov %w17 -> %h0