diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2e72a89c..814a85b4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,5 +1,8 @@ name: test -on: [push] +on: + push: + branches: + - '*' defaults: run: diff --git a/CMakeLists.txt b/CMakeLists.txt index 28b35cbe..79b0f517 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.5) -project(xbyak LANGUAGES CXX VERSION 7.09) +project(xbyak LANGUAGES CXX VERSION 7.09.1) file(GLOB headers xbyak/*.h) diff --git a/doc/changelog.md b/doc/changelog.md index 0829929e..10f6a9d9 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,6 +1,6 @@ # History -* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some nmemonics with {sae}/{er}. +* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}. * 2024/Oct/07 ver 7.08 support rdfsbase etc. * 2024/Aug/29 ver 7.07.1 adapt to NASM 2.16.03 output of xchg (The functionality stays the same.) * 2024/Jun/11 ver 7.07 support xresldtrk/xsusldtrk diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 2b294ee5..79ec79aa 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -251,8 +251,8 @@ void putXM_X() { 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, { 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, - { 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 }, - { 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 }, + { 0x63, "vpcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 }, + { 0x63, "vpcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/meson.build b/meson.build index 663c68be..0fea416a 100644 --- a/meson.build +++ b/meson.build @@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '7.09', + version: '7.09.1', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) diff --git a/readme.md b/readme.md index 12f5bc75..3ee7dd1d 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ -# Xbyak 7.09 [![Badge Build]][Build Status] +# Xbyak 7.09.1 [![Badge Build]][Build Status] *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)* diff --git a/readme.txt b/readme.txt index ae0f9737..a82c4082 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09.1 ----------------------------------------------------------------------------- ◎概要 diff --git a/test/Makefile b/test/Makefile index 862c110a..ca2f0bb0 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,6 +60,10 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 +TEST_FILES=avx10.txt misc.txt +xed_test: + @for target in $(addprefix target/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done + test_nm: normalize_prefix $(TARGET) $(MAKE) -C ../gen ifneq ($(ONLY_64BIT),1) @@ -118,7 +122,7 @@ test: detect_x32 $(MAKE) test_avx512 clean: - $(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test + $(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test detect_x32 lib_run: lib_test.cpp lib_run.cpp lib.h $(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run diff --git a/test/misc.cpp b/test/misc.cpp index 18760d4e..bc5083b3 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -285,24 +285,24 @@ CYBOZU_TEST_AUTO(vpclmulqdq) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } -CYBOZU_TEST_AUTO(vcompressb_w) +CYBOZU_TEST_AUTO(vpcompressb_w) { struct Code : Xbyak::CodeGenerator { Code() { - vcompressb(ptr[rax + 64], xmm1); - vcompressb(xmm30 | k5, xmm1); - vcompressb(ptr[rax + 64], ymm1); - vcompressb(ymm30 | k3 |T_z, ymm1); - vcompressb(ptr[rax + 64], zmm1); - vcompressb(zmm30 | k2 |T_z, zmm1); - - vcompressw(ptr[rax + 64], xmm1); - vcompressw(xmm30 | k5, xmm1); - vcompressw(ptr[rax + 64], ymm1); - vcompressw(ymm30 | k3 |T_z, ymm1); - vcompressw(ptr[rax + 64], zmm1); - vcompressw(zmm30 | k2 |T_z, zmm1); + vpcompressb(ptr[rax + 64], xmm1); + vpcompressb(xmm30 | k5, xmm1); + vpcompressb(ptr[rax + 64], ymm1); + vpcompressb(ymm30 | k3 |T_z, ymm1); + vpcompressb(ptr[rax + 64], zmm1); + vpcompressb(zmm30 | k2 |T_z, zmm1); + + vpcompressw(ptr[rax + 64], xmm1); + vpcompressw(xmm30 | k5, xmm1); + vpcompressw(ptr[rax + 64], ymm1); + vpcompressw(ymm30 | k3 |T_z, ymm1); + vpcompressw(ptr[rax + 64], zmm1); + vpcompressw(zmm30 | k2 |T_z, zmm1); } } c; const uint8_t tbl[] = { diff --git a/test/target/avx10.txt b/test/target/avx10.txt new file mode 100644 index 00000000..8ee52caa --- /dev/null +++ b/test/target/avx10.txt @@ -0,0 +1,149 @@ +vaddpd(ymm1, ymm2, ymm3 |T_rn_sae); +vaddph(ymm1, ymm2, ymm3 |T_rn_sae); +vaddps(ymm1, ymm2, ymm3 |T_rn_sae); +vcmppd(k1, ymm2, ymm3 |T_sae, 3); +vcmpph(k1, ymm2, ymm3 |T_sae, 3); +vcmpps(k1, ymm2, ymm3 |T_sae, 3); +vcvtdq2ph(xmm1, ymm2 |T_rn_sae); +vcvtdq2ps(ymm1, ymm2 |T_rn_sae); +vcvtpd2dq(xmm1, ymm2 |T_rn_sae); +vcvtpd2ph(xmm1, ymm2 |T_rn_sae); +vcvtpd2ps(xmm1, ymm2 |T_rn_sae); +vcvtpd2qq(ymm1, ymm2 |T_rn_sae); +vcvtpd2udq(xmm1, ymm2 |T_rn_sae); +vcvtpd2uqq(ymm1, ymm2 |T_rn_sae); +vcvtph2dq(ymm1, xmm2 |T_rn_sae); +vcvtph2pd(ymm1, xmm2 |T_sae); +vcvtph2ps(ymm1, xmm2 |T_sae); +vcvtph2psx(ymm1, xmm2 |T_sae); +vcvtph2qq(ymm1, xmm2 |T_rn_sae); +vcvtph2udq(ymm1, xmm2 |T_rn_sae); +vcvtph2uqq(ymm1, xmm2 |T_rn_sae); +vcvtph2uw(ymm1, ymm2 |T_rn_sae); +vcvtph2w(ymm1, ymm2 |T_rn_sae); +vcvtps2dq(ymm1, ymm2 |T_rn_sae); +vcvtps2pd(ymm1, xmm2 |T_sae); +vcvtps2ph(xmm1, ymm2 |T_sae, 3); +vcvtps2phx(xmm1, ymm2 |T_rn_sae); +vcvtps2qq(ymm1, xmm2 |T_rn_sae); +vcvtps2udq(ymm1, ymm2 |T_rn_sae); +vcvtps2uqq(ymm1, xmm2 |T_rn_sae); +vcvtqq2pd(ymm1, ymm2 |T_rn_sae); +vcvtqq2ph(xmm1, ymm2 |T_rn_sae); +vcvtqq2ps(xmm1, ymm2 |T_rn_sae); +vcvttpd2dq(xmm1, ymm2 |T_sae); +vcvttpd2qq(ymm1, ymm2 |T_sae); +vcvttpd2udq(xmm1, ymm2 |T_sae); +vcvttpd2uqq(ymm1, ymm2 |T_sae); +vcvttph2dq(ymm1, xmm2 |T_sae); +vcvttph2qq(ymm1, xmm2 |T_sae); +vcvttph2udq(ymm1, xmm2 |T_sae); +vcvttph2uqq(ymm1, xmm2 |T_sae); +vcvttph2uw(ymm1, ymm2 |T_sae); +vcvttph2w(ymm1, ymm2 |T_sae); +vcvttps2dq(ymm1, ymm2 |T_sae); +vcvttps2qq(ymm1, xmm2 |T_sae); +vcvttps2udq(ymm1, ymm2 |T_sae); +vcvttps2uqq(ymm1, xmm2 |T_sae); +vcvtudq2ph(xmm1, ymm2 |T_rn_sae); +vcvtudq2ps(ymm1, ymm2 |T_rn_sae); +vcvtuqq2pd(ymm1, ymm2 |T_rn_sae); +vcvtuqq2ph(xmm1, ymm2 |T_rn_sae); +vcvtuqq2ps(xmm1, ymm2 |T_rn_sae); +vcvtuw2ph(ymm1, ymm2 |T_rn_sae); +vcvtw2ph(ymm1, ymm2 |T_rn_sae); +vdivpd(ymm1, ymm2, ymm3 |T_rn_sae); +vdivph(ymm1, ymm2, ymm3 |T_rn_sae); +vdivps(ymm1, ymm2, ymm3 |T_rn_sae); +vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae); +vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae); +vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3); +vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3); +vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); +vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); +vgetexppd(ymm1, ymm2 |T_sae); +vgetexpph(ymm1, ymm2 |T_sae); +vgetexpps(ymm1, ymm2 |T_sae); +vgetmantpd(ymm1, ymm2 |T_sae, 3); +vgetmantph(ymm1, ymm2 |T_sae, 3); +vgetmantps(ymm1, ymm2 |T_sae, 3); +vmaxpd(ymm1, ymm2, ymm3 |T_sae); +vmaxph(ymm1, ymm2, ymm3 |T_sae); +vmaxps(ymm1, ymm2, ymm3 |T_sae); +vminpd(ymm1, ymm2, ymm3 |T_sae); +vminph(ymm1, ymm2, ymm3 |T_sae); +vminps(ymm1, ymm2, ymm3 |T_sae); +vmulpd(ymm1, ymm2, ymm3 |T_rn_sae); +vmulph(ymm1, ymm2, ymm3 |T_rn_sae); +vmulps(ymm1, ymm2, ymm3 |T_rn_sae); +vrangepd(ymm1, ymm2, ymm3 |T_sae, 3); +vrangeps(ymm1, ymm2, ymm3 |T_sae, 3); +vreducepd(ymm1, ymm2 |T_sae, 3); +vreduceph(ymm1, ymm2 |T_sae, 3); +vreduceps(ymm1, ymm2 |T_sae, 3); +vrndscalepd(ymm1, ymm2 |T_sae, 3); +vrndscaleph(ymm1, ymm2 |T_sae, 3); +vrndscaleps(ymm1, ymm2 |T_sae, 3); +vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae); +vscalefph(ymm1, ymm2, ymm3 |T_rn_sae); +vscalefps(ymm1, ymm2, ymm3 |T_rn_sae); +vsqrtpd(ymm1, ymm2 |T_rn_sae); +vsqrtph(ymm1, ymm2 |T_rn_sae); +vsqrtps(ymm1, ymm2 |T_rn_sae); +vsubpd(ymm1, ymm2, ymm3 |T_rn_sae); +vsubph(ymm1, ymm2, ymm3 |T_rn_sae); +vsubps(ymm1, ymm2, ymm3 |T_rn_sae); diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp new file mode 100644 index 00000000..08dc8afe --- /dev/null +++ b/test/test_by_xed.cpp @@ -0,0 +1,23 @@ +#include +#include + +struct Code : Xbyak::CodeGenerator { + Code() + { +#include "cpp.txt" + } +}; + +int main() + try +{ + Code c; + FILE *fp = fopen("bin", "wb"); + if (fp) { + fwrite(c.getCode(), 1, c.getSize(), fp); + fclose(fp); + } +} catch (std::exception& e) { + printf("ERR %s\n", e.what()); + return 1; +} diff --git a/test/test_by_xed.py b/test/test_by_xed.py new file mode 100644 index 00000000..f24d7f6b --- /dev/null +++ b/test/test_by_xed.py @@ -0,0 +1,287 @@ +import re +import math +import sys + +class Reg: + def __init__(self, s): + self.name = s + def __str__(self): + return self.name + +g_regTbl = ''' +eax ecx edx ebx esp ebp esi edi +ax cx dx bx sp bp si di +al cl dl bl ah ch dh bh +k1 k2 k3 k4 k5 k6 k7 +rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 +r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 +r8d r9d r10d r11d r12d r13d r14d r15d +r16d r17d r18d r19d r20d r21d r22d r23d r24d r25d r26d r27d r28d r29d r30d r31d +r8w r9w r10w r11w r12w r13w r14w r15w +r16w r17w r18w r19w r20w r21w r22w r23w r24w r25w r26w r27w r28w r29w r30w r31w +r8b r9b r10b r11b r12b r13b r14b r15b +r16b r17b r18b r19b r20b r21b r22b r23b r24b r25b r26b r27b r28b r29b r30b r31b +spl bpl sil dil +xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 +xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15 +xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23 +xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31 +ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7 +ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15 +ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23 +ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31 +zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7 +zmm8 zmm9 zmm10 zmm11 zmm12 zmm13 zmm14 zmm15 +zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23 +zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31 +'''.split() + +# define global constants +for e in g_regTbl: + globals()[e] = Reg(e) + +g_replaceCharTbl = '{}();|,' +g_replaceChar = str.maketrans(g_replaceCharTbl, ' '*len(g_replaceCharTbl)) +g_sizeTbl = ['byte', 'word', 'dword', 'qword', 'xword', 'yword', 'zword'] +g_attrTbl = ['T_sae', 'T_rn_sae', 'T_rd_sae', 'T_ru_sae', 'T_rz_sae'] #, 'T_z'] +g_attrXedTbl = ['sae', 'rne-sae', 'rd-sae', 'ru-sae', 'rz-sae'] + +class Attr: + def __init__(self, s): + self.name = s + def __str__(self): + return self.name + +for e in g_attrTbl: + globals()[e] = Attr(e) + +class Memory: + def __init__(self, size=0, base=None, index=None, scale=0, disp=0): + self.size = size + self.base = base + self.index = index + self.scale = scale + self.disp = disp + + def __str__(self): + s = 'ptr' if self.size == 0 else g_sizeTbl[int(math.log2(self.size))] + s += ' [' + needPlus = False + if self.base: + s += str(self.base) + needPlus = True + if self.index: + if needPlus: + s += '+' + s += str(self.index) + if self.scale > 1: + s += f'*{self.scale}' + needPlus = True + if self.disp: + if needPlus: + s += '+' + s += hex(self.disp) + s += ']' + return s + + + def __eq__(self, rhs): + return str(self) == str(rhs) + +def parseMemory(s): + sizeTbl = { + 'byte': 1, 'word': 2, 'dword': 4, 'qword': 8, + 'xword': 16, 'yword': 32, 'zword': 64 + } + + s = s.replace(' ', '').lower() + + # Parse size + size = 0 + for i in range(len(g_sizeTbl)): + w = g_sizeTbl[i] + if s.startswith(w): + size = 1< 0 and elems[i-1][2] == '-' else 1 + b = 16 if e[0].startswith('0x') else 10 + disp += sign * int(e[0], b) + + return Memory(size, base, index, scale, disp) + +class Nmemonic: + def __init__(self, name, args=[], attrs=[]): + self.name = name + self.args = args + self.attrs = attrs + def __str__(self): + s = f'{self.name}(' + for i in range(len(self.args)): + if i > 0: + s += ', ' + s += str(self.args[i]) + for e in self.attrs: + s += f'|{e}' + s += ');' + return s + +def parseNmemonic(s): + s = s.translate(g_replaceChar) + + # reconstruct memory string + v = [] + inMemory = False + for e in s.split(): + if inMemory: + v[-1] += e + if ']' in e: + inMemory = False + else: + v.append(e) + if e in g_sizeTbl or e == 'ptr': + v[-1] += ' ' # to avoid 'byteptr' + inMemory = True + + name = v[0] + args = [] + attrs = [] + for e in v[1:]: + if e.startswith('0x'): + args.append(int(e, 16)) + elif e[0] in '0123456789': + args.append(int(e)) + elif e in g_attrTbl: + attrs.append(Attr(e)) + elif e in g_attrXedTbl: + attrs.append(Attr(g_attrTbl[g_attrXedTbl.index(e)])) + elif e in g_regTbl: + args.append(e) + else: + args.append(parseMemory(e)) + return Nmemonic(name, args, attrs) + +def loadFile(name): + with open(name) as f: + r = [] + for line in f.read().split('\n'): + if line: + if line[0] == '#': + continue + r.append(line) + return r + +# remove top 5 information +# e.g. XDIS 0: AVX512 AVX512EVEX 62F1E91858CB vaddpd ymm1{rne-sae}, ymm2, ymm3 +def removeExtraInfo(s): + v = s.split() + return ' '.join(v[5:]) + +def run(cppText, xedText): + cpp = loadFile(cppText) + xed = loadFile(xedText) + for i in range(len(cpp)): + line1 = cpp[i] + line2 = removeExtraInfo(xed[i]) + m1 = parseNmemonic(line1) + m2 = parseNmemonic(line2) + + assertEqualStr(m1, m2, f'{i}') + print('run ok') + +def assertEqualStr(a, b, msg=None): + if str(a) != str(b): + raise Exception(f'assert fail {msg}:', str(a), str(b)) + +def MemoryTest(): + tbl = [ + (Memory(0, rax), 'ptr [rax]'), + (Memory(4, rax), 'dword [rax]'), + (Memory(8, rax, rcx), 'qword [rax+rcx]'), + (Memory(8, rax, rcx, 4), 'qword [rax+rcx*4]'), + (Memory(8, None, rcx, 4), 'qword [rcx*4]'), + (Memory(8, rax, None, 0, 5), 'qword [rax+0x5]'), + (Memory(8, None, None, 0, 255), 'qword [0xff]'), + ] + for (m, expected) in tbl: + assertEqualStr(m, expected) + +def parseMemoryTest(): + print('parseMemoryTest') + tbl = [ + ('[]', Memory()), + ('[rax]', Memory(0, rax)), + ('ptr[rax]', Memory(0, rax)), + ('dword[rbx]', Memory(4, rbx)), + ('xword ptr[rcx]', Memory(16, rcx)), + ('xword ptr[rdx*8]', Memory(16, None, rdx, 8)), + ('[12345]', Memory(0, None, None, 0, 12345)), + ('[0x12345]', Memory(0, None, None, 0, 0x12345)), + ('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)), + ('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)), + ] + for (s, expected) in tbl: + my = parseMemory(s) + assertEqualStr(my, expected) + +def parseNmemonicTest(): + print('parseNmemonicTest') + tbl = [ + ('vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])), + ('vaddpd ymm1{rne-sae}, ymm2, ymm3', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])), + ('mov(rax, dword ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(4, rcx, rdx, 8)])), + ('mov(rax, ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(0, rcx, rdx, 8)])), + ('vcmppd(k1, ymm2, ymm3 |T_sae, 3);', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])), + ('vcmppd k1{sae}, ymm2, ymm3, 0x3', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])), + ] + for (s, expected) in tbl: + e = parseNmemonic(s) + assertEqualStr(e, expected) + +def test(): + print('test start') + MemoryTest() + parseMemoryTest() + parseNmemonicTest() + print('test end') + +def main(): + if len(sys.argv) == 2 and sys.argv[1] == 'test': + test() + elif len(sys.argv) == 3: + run(sys.argv[1], sys.argv[2]) + else: + print(f'{__name__} # compare cpp-text and xed-text generated by xed') + print(f'{__name__} test # for test') + +if __name__ == '__main__': + main() diff --git a/test/test_by_xed.sh b/test/test_by_xed.sh new file mode 100755 index 00000000..6d820bd7 --- /dev/null +++ b/test/test_by_xed.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -e +XED=${XED:=xed} +CXX=${CXX:=g++} +PYTHON=${PYTHON:=python3} + +if [ $# -ne 1 ]; then + echo "./test_by_xed.sh " + exit 1 +fi + +TARGET=$1 + +CFLAGS="-Wall -Wextra -I ../" + +echo "test:" $TARGET +cp $TARGET cpp.txt +$CXX $CFLAGS test_by_xed.cpp -o test_by_xed +./test_by_xed +$XED -64 -ir bin > out.txt +$PYTHON test_by_xed.py cpp.txt out.txt + diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index c9b6269e..f0d99db5 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -155,7 +155,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x7090 /* 0xABCD = A.BC(.D) */ + VERSION = 0x7091 /* 0xABCD = A.BC(.D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index a1b61db4..8316bd92 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "7.09"; } +const char *getVersionString() const { return "7.09.1"; } void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); } void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); } void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); } @@ -2198,10 +2198,8 @@ void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); } void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); } void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); } -void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); } void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); } -void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); } void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); } void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); } void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); } @@ -2409,8 +2407,10 @@ void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { op void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1E, imm); } void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3E, imm); } void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3F, imm); } +void vpcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); } void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8B); } void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8B); } +void vpcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); } void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); } void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); } void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }