From 43e066a18aac1a25ab9cd3ad232e2e6d757159a4 Mon Sep 17 00:00:00 2001 From: scribam Date: Sat, 17 Feb 2024 18:20:45 +0100 Subject: [PATCH] external: add xbyak submodule --- .gitmodules | 3 + CMakeLists.txt | 2 +- core/deps/xbyak/.github/workflows/main.yml | 11 - core/deps/xbyak/.gitignore | 1 - core/deps/xbyak/CMakeLists.txt | 46 - core/deps/xbyak/COPYRIGHT | 47 - core/deps/xbyak/Makefile | 24 - core/deps/xbyak/cmake/config.cmake.in | 1 - core/deps/xbyak/gen/Makefile | 26 - core/deps/xbyak/gen/avx_type.hpp | 170 - core/deps/xbyak/gen/b2hex.cpp | 17 - core/deps/xbyak/gen/gen_avx512.cpp | 750 ----- core/deps/xbyak/gen/gen_code.cpp | 1929 ----------- core/deps/xbyak/gen/sortline.cpp | 23 - core/deps/xbyak/gen/update.bat | 17 - core/deps/xbyak/readme.md | 617 ---- core/deps/xbyak/readme.txt | 560 ---- core/deps/xbyak/sample/Makefile | 123 - core/deps/xbyak/sample/bf.cpp | 211 -- core/deps/xbyak/sample/bf.vcxproj | 228 -- core/deps/xbyak/sample/calc.cpp | 229 -- core/deps/xbyak/sample/calc.vcxproj | 228 -- core/deps/xbyak/sample/calc2.cpp | 302 -- core/deps/xbyak/sample/echo.bf | 5 - core/deps/xbyak/sample/fizzbuzz.bf | 19 - core/deps/xbyak/sample/hello.bf | 3 - core/deps/xbyak/sample/jmp_table.cpp | 128 - core/deps/xbyak/sample/memfunc.cpp | 111 - core/deps/xbyak/sample/profiler.cpp | 90 - core/deps/xbyak/sample/protect-re.cpp | 70 - core/deps/xbyak/sample/quantize.cpp | 226 -- core/deps/xbyak/sample/quantize.vcxproj | 228 -- core/deps/xbyak/sample/stackframe.cpp | 29 - core/deps/xbyak/sample/static_buf.cpp | 45 - core/deps/xbyak/sample/test0.cpp | 190 -- core/deps/xbyak/sample/test0.vcxproj | 228 -- core/deps/xbyak/sample/test_util.cpp | 134 - core/deps/xbyak/sample/test_util.vcxproj | 228 -- core/deps/xbyak/sample/toyvm.cpp | 380 --- core/deps/xbyak/sample/toyvm.vcxproj | 228 -- core/deps/xbyak/test/Makefile | 104 - core/deps/xbyak/test/Makefile.win | 14 - core/deps/xbyak/test/a.bat | 9 - core/deps/xbyak/test/address.cpp | 155 - core/deps/xbyak/test/bad_address.cpp | 28 - core/deps/xbyak/test/cvt_test.cpp | 151 - core/deps/xbyak/test/cybozu/inttype.hpp | 163 - core/deps/xbyak/test/cybozu/test.hpp | 373 --- core/deps/xbyak/test/jmp.cpp | 1385 -------- core/deps/xbyak/test/jmp.sln | 20 - core/deps/xbyak/test/jmp.vcproj | 195 -- core/deps/xbyak/test/lib.h | 63 - core/deps/xbyak/test/lib_min.cpp | 51 - core/deps/xbyak/test/lib_run.cpp | 9 - core/deps/xbyak/test/lib_test.cpp | 13 - core/deps/xbyak/test/make_512.cpp | 2200 ------------- core/deps/xbyak/test/make_nm.cpp | 3468 -------------------- core/deps/xbyak/test/misc.cpp | 846 ----- core/deps/xbyak/test/mprotect_test.cpp | 37 - core/deps/xbyak/test/nm_frame.cpp | 42 - core/deps/xbyak/test/noexception.cpp | 111 - core/deps/xbyak/test/normalize_prefix.cpp | 45 - core/deps/xbyak/test/readme.txt | 6 - core/deps/xbyak/test/rip-label-imm.cpp | 88 - core/deps/xbyak/test/set_opt.bat | 2 - core/deps/xbyak/test/sf_test.cpp | 416 --- core/deps/xbyak/test/state.pptx | Bin 68370 -> 0 bytes core/deps/xbyak/test/test_address.bat | 37 - core/deps/xbyak/test/test_address.sh | 41 - core/deps/xbyak/test/test_all.bat | 8 - core/deps/xbyak/test/test_avx.bat | 42 - core/deps/xbyak/test/test_avx.sh | 48 - core/deps/xbyak/test/test_avx512.bat | 31 - core/deps/xbyak/test/test_avx512.sh | 35 - core/deps/xbyak/test/test_avx_all.bat | 13 - core/deps/xbyak/test/test_jmp.bat | 4 - core/deps/xbyak/test/test_misc.bat | 4 - core/deps/xbyak/test/test_mmx.cpp | 78 - core/deps/xbyak/test/test_nm.bat | 46 - core/deps/xbyak/test/test_nm.sh | 61 - core/deps/xbyak/test/test_nm_all.bat | 11 - core/deps/xbyak/xbyak.sln | 90 - core/deps/xbyak/xbyak/xbyak.h | 2826 ---------------- core/deps/xbyak/xbyak/xbyak_bin2hex.h | 258 -- core/deps/xbyak/xbyak/xbyak_mnemonic.h | 2052 ------------ core/deps/xbyak/xbyak/xbyak_util.h | 926 ------ external/xbyak | 1 + 87 files changed, 5 insertions(+), 24508 deletions(-) delete mode 100644 core/deps/xbyak/.github/workflows/main.yml delete mode 100644 core/deps/xbyak/.gitignore delete mode 100644 core/deps/xbyak/CMakeLists.txt delete mode 100644 core/deps/xbyak/COPYRIGHT delete mode 100644 core/deps/xbyak/Makefile delete mode 100644 core/deps/xbyak/cmake/config.cmake.in delete mode 100644 core/deps/xbyak/gen/Makefile delete mode 100644 core/deps/xbyak/gen/avx_type.hpp delete mode 100644 core/deps/xbyak/gen/b2hex.cpp delete mode 100644 core/deps/xbyak/gen/gen_avx512.cpp delete mode 100644 core/deps/xbyak/gen/gen_code.cpp delete mode 100644 core/deps/xbyak/gen/sortline.cpp delete mode 100644 core/deps/xbyak/gen/update.bat delete mode 100644 core/deps/xbyak/readme.md delete mode 100644 core/deps/xbyak/readme.txt delete mode 100644 core/deps/xbyak/sample/Makefile delete mode 100644 core/deps/xbyak/sample/bf.cpp delete mode 100644 core/deps/xbyak/sample/bf.vcxproj delete mode 100644 core/deps/xbyak/sample/calc.cpp delete mode 100644 core/deps/xbyak/sample/calc.vcxproj delete mode 100644 core/deps/xbyak/sample/calc2.cpp delete mode 100644 core/deps/xbyak/sample/echo.bf delete mode 100644 core/deps/xbyak/sample/fizzbuzz.bf delete mode 100644 core/deps/xbyak/sample/hello.bf delete mode 100644 core/deps/xbyak/sample/jmp_table.cpp delete mode 100644 core/deps/xbyak/sample/memfunc.cpp delete mode 100644 core/deps/xbyak/sample/profiler.cpp delete mode 100644 core/deps/xbyak/sample/protect-re.cpp delete mode 100644 core/deps/xbyak/sample/quantize.cpp delete mode 100644 core/deps/xbyak/sample/quantize.vcxproj delete mode 100644 core/deps/xbyak/sample/stackframe.cpp delete mode 100644 core/deps/xbyak/sample/static_buf.cpp delete mode 100644 core/deps/xbyak/sample/test0.cpp delete mode 100644 core/deps/xbyak/sample/test0.vcxproj delete mode 100644 core/deps/xbyak/sample/test_util.cpp delete mode 100644 core/deps/xbyak/sample/test_util.vcxproj delete mode 100644 core/deps/xbyak/sample/toyvm.cpp delete mode 100644 core/deps/xbyak/sample/toyvm.vcxproj delete mode 100644 core/deps/xbyak/test/Makefile delete mode 100644 core/deps/xbyak/test/Makefile.win delete mode 100644 core/deps/xbyak/test/a.bat delete mode 100644 core/deps/xbyak/test/address.cpp delete mode 100644 core/deps/xbyak/test/bad_address.cpp delete mode 100644 core/deps/xbyak/test/cvt_test.cpp delete mode 100644 core/deps/xbyak/test/cybozu/inttype.hpp delete mode 100644 core/deps/xbyak/test/cybozu/test.hpp delete mode 100644 core/deps/xbyak/test/jmp.cpp delete mode 100644 core/deps/xbyak/test/jmp.sln delete mode 100644 core/deps/xbyak/test/jmp.vcproj delete mode 100644 core/deps/xbyak/test/lib.h delete mode 100644 core/deps/xbyak/test/lib_min.cpp delete mode 100644 core/deps/xbyak/test/lib_run.cpp delete mode 100644 core/deps/xbyak/test/lib_test.cpp delete mode 100644 core/deps/xbyak/test/make_512.cpp delete mode 100644 core/deps/xbyak/test/make_nm.cpp delete mode 100644 core/deps/xbyak/test/misc.cpp delete mode 100644 core/deps/xbyak/test/mprotect_test.cpp delete mode 100644 core/deps/xbyak/test/nm_frame.cpp delete mode 100644 core/deps/xbyak/test/noexception.cpp delete mode 100644 core/deps/xbyak/test/normalize_prefix.cpp delete mode 100644 core/deps/xbyak/test/readme.txt delete mode 100644 core/deps/xbyak/test/rip-label-imm.cpp delete mode 100644 core/deps/xbyak/test/set_opt.bat delete mode 100644 core/deps/xbyak/test/sf_test.cpp delete mode 100644 core/deps/xbyak/test/state.pptx delete mode 100644 core/deps/xbyak/test/test_address.bat delete mode 100644 core/deps/xbyak/test/test_address.sh delete mode 100644 core/deps/xbyak/test/test_all.bat delete mode 100644 core/deps/xbyak/test/test_avx.bat delete mode 100644 core/deps/xbyak/test/test_avx.sh delete mode 100644 core/deps/xbyak/test/test_avx512.bat delete mode 100644 core/deps/xbyak/test/test_avx512.sh delete mode 100644 core/deps/xbyak/test/test_avx_all.bat delete mode 100644 core/deps/xbyak/test/test_jmp.bat delete mode 100644 core/deps/xbyak/test/test_misc.bat delete mode 100644 core/deps/xbyak/test/test_mmx.cpp delete mode 100644 core/deps/xbyak/test/test_nm.bat delete mode 100644 core/deps/xbyak/test/test_nm.sh delete mode 100644 core/deps/xbyak/test/test_nm_all.bat delete mode 100644 core/deps/xbyak/xbyak.sln delete mode 100644 core/deps/xbyak/xbyak/xbyak.h delete mode 100644 core/deps/xbyak/xbyak/xbyak_bin2hex.h delete mode 100644 core/deps/xbyak/xbyak/xbyak_mnemonic.h delete mode 100644 core/deps/xbyak/xbyak/xbyak_util.h create mode 160000 external/xbyak diff --git a/.gitmodules b/.gitmodules index e594267cdf..7a62c154de 100644 --- a/.gitmodules +++ b/.gitmodules @@ -69,3 +69,6 @@ [submodule "external/vixl"] path = external/vixl url = https://github.com/Linaro/vixl.git +[submodule "external/xbyak"] + path = external/xbyak + url = https://github.com/herumi/xbyak.git diff --git a/CMakeLists.txt b/CMakeLists.txt index f0eeb0a8ce..e353dd4a38 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1453,7 +1453,7 @@ if("arm64" IN_LIST ARCHITECTURE) target_link_libraries(${PROJECT_NAME} PRIVATE libvixl) endif() if("x86" IN_LIST ARCHITECTURE OR "x86_64" IN_LIST ARCHITECTURE) - add_subdirectory(core/deps/xbyak EXCLUDE_FROM_ALL) + add_subdirectory(external/xbyak EXCLUDE_FROM_ALL) target_link_libraries(${PROJECT_NAME} PRIVATE xbyak::xbyak) if(CMAKE_SIZEOF_VOID_P EQUAL 4) target_sources(${PROJECT_NAME} PRIVATE diff --git a/core/deps/xbyak/.github/workflows/main.yml b/core/deps/xbyak/.github/workflows/main.yml deleted file mode 100644 index 3fad142205..0000000000 --- a/core/deps/xbyak/.github/workflows/main.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: test -on: [push] - -jobs: - build: - name: test - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - run: sudo apt install nasm yasm g++-multilib tcsh - - run: make test diff --git a/core/deps/xbyak/.gitignore b/core/deps/xbyak/.gitignore deleted file mode 100644 index 24b0b1de5b..0000000000 --- a/core/deps/xbyak/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/build* # cmake diff --git a/core/deps/xbyak/CMakeLists.txt b/core/deps/xbyak/CMakeLists.txt deleted file mode 100644 index f2c54f1412..0000000000 --- a/core/deps/xbyak/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -cmake_minimum_required(VERSION 2.6...3.0.2) - -project(xbyak CXX) - -file(GLOB headers xbyak/*.h) - -if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2) - include(GNUInstallDirs) - add_library(${PROJECT_NAME} INTERFACE) - add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) - - target_include_directories( - ${PROJECT_NAME} INTERFACE - "$" - "$" - ) - - install( - TARGETS ${PROJECT_NAME} - EXPORT ${PROJECT_NAME}-targets - ) - - configure_file( - cmake/config.cmake.in - ${PROJECT_NAME}Config.cmake - @ONLY - ) - - install( - FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} - ) - - install( - EXPORT ${PROJECT_NAME}-targets - NAMESPACE ${PROJECT_NAME}:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} - ) -elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR) - set(CMAKE_INSTALL_INCLUDEDIR "include") -endif() - -install( - FILES ${headers} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/xbyak -) diff --git a/core/deps/xbyak/COPYRIGHT b/core/deps/xbyak/COPYRIGHT deleted file mode 100644 index 66b6ea55d0..0000000000 --- a/core/deps/xbyak/COPYRIGHT +++ /dev/null @@ -1,47 +0,0 @@ - -Copyright (c) 2007 MITSUNARI Shigeo -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. -Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. -Neither the name of the copyright owner nor the names of its contributors may -be used to endorse or promote products derived from this software without -specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た -す場合に限り、再頒布および使用が許可されます。 - -ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項 -を含めること。 -バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作 -権表示、本条件一覧、および下記免責条項を含めること。 -書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進 -に、著作権者の名前またはコントリビューターの名前を使用してはならない。 -本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ -れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性 -に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。 -著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを -問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で -あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、 -本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の -喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接 -損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 -一切責任を負わないものとします。 diff --git a/core/deps/xbyak/Makefile b/core/deps/xbyak/Makefile deleted file mode 100644 index f91f6261bd..0000000000 --- a/core/deps/xbyak/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -PREFIX?=/usr/local -INSTALL_DIR=$(PREFIX)/include/xbyak - -all: - $(MAKE) -C sample - -clean: - $(MAKE) -C sample clean - -install: - mkdir -p $(INSTALL_DIR) - cp -pR xbyak/*.h $(INSTALL_DIR) - -uninstall: - rm -i $(INSTALL_DIR)/*.h - rmdir $(INSTALL_DIR) - -update: - $(MAKE) -C gen - -test: - $(MAKE) -C test test - -.PHONY: test update diff --git a/core/deps/xbyak/cmake/config.cmake.in b/core/deps/xbyak/cmake/config.cmake.in deleted file mode 100644 index f40ebfa888..0000000000 --- a/core/deps/xbyak/cmake/config.cmake.in +++ /dev/null @@ -1 +0,0 @@ -include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake") diff --git a/core/deps/xbyak/gen/Makefile b/core/deps/xbyak/gen/Makefile deleted file mode 100644 index 53d1a948c1..0000000000 --- a/core/deps/xbyak/gen/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -TARGET=../xbyak/xbyak_mnemonic.h -BIN=sortline gen_code gen_avx512 -CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers -all: $(TARGET) -sortline: sortline.cpp - $(CXX) $(CFLAGS) $< -o $@ -gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp - $(CXX) $(CFLAGS) $< -o $@ -gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp - $(CXX) $(CFLAGS) $< -o $@ - -$(TARGET): $(BIN) - ./gen_code | ./sortline > $@ - echo "#ifdef XBYAK_ENABLE_OMITTED_OPERAND" >> $@ - ./gen_code omit | ./sortline >> $@ - echo "#endif" >>$@ - ./gen_code fixed >> $@ - echo "#ifndef XBYAK_DISABLE_AVX512" >> $@ - ./gen_avx512 | ./sortline >> $@ - echo "#ifdef XBYAK64" >> $@ - ./gen_avx512 64 | ./sortline >> $@ - echo "#endif" >> $@ - echo "#endif" >> $@ - -clean: - $(RM) $(BIN) $(TARGET) diff --git a/core/deps/xbyak/gen/avx_type.hpp b/core/deps/xbyak/gen/avx_type.hpp deleted file mode 100644 index a659699e52..0000000000 --- a/core/deps/xbyak/gen/avx_type.hpp +++ /dev/null @@ -1,170 +0,0 @@ -#include -// copy CodeGenerator::AVXtype - enum AVXtype { - // low 3 bit - T_N1 = 1, - T_N2 = 2, - T_N4 = 3, - T_N8 = 4, - T_N16 = 5, - T_N32 = 6, - T_NX_MASK = 7, - // - T_N_VL = 1 << 3, // N * (1, 2, 4) for VL - T_DUP = 1 << 4, // N = (8, 32, 64) - T_66 = 1 << 5, - T_F3 = 1 << 6, - T_F2 = 1 << 7, - T_0F = 1 << 8, - T_0F38 = 1 << 9, - T_0F3A = 1 << 10, - T_L0 = 1 << 11, - T_L1 = 1 << 12, - T_W0 = 1 << 13, - T_W1 = 1 << 14, - T_EW0 = 1 << 15, - T_EW1 = 1 << 16, - T_YMM = 1 << 17, // support YMM, ZMM - T_EVEX = 1 << 18, - T_ER_X = 1 << 19, // xmm{er} - T_ER_Y = 1 << 20, // ymm{er} - T_ER_Z = 1 << 21, // zmm{er} - T_SAE_X = 1 << 22, // xmm{sae} - T_SAE_Y = 1 << 23, // ymm{sae} - T_SAE_Z = 1 << 24, // zmm{sae} - T_MUST_EVEX = 1 << 25, // contains T_EVEX - T_B32 = 1 << 26, // m32bcst - T_B64 = 1 << 27, // m64bcst - T_M_K = 1 << 28, // mem{k} - T_VSIB = 1 << 29, - T_MEM_EVEX = 1 << 30, // use evex if mem - T_XXX - }; - -const int NONE = 256; // same as Xbyak::CodeGenerator::NONE - -std::string type2String(int type) -{ - std::string str; - int low = type & T_NX_MASK; - if (0 < low) { - const char *tbl[8] = { - "T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32" - }; - assert(low < int(sizeof(tbl) / sizeof(tbl[0]))); - str = tbl[low - 1]; - } - if (type & T_N_VL) { - if (!str.empty()) str += " | "; - str += "T_N_VL"; - } - if (type & T_DUP) { - if (!str.empty()) str += " | "; - str += "T_DUP"; - } - if (type & T_66) { - if (!str.empty()) str += " | "; - str += "T_66"; - } - if (type & T_F3) { - if (!str.empty()) str += " | "; - str += "T_F3"; - } - if (type & T_F2) { - if (!str.empty()) str += " | "; - str += "T_F2"; - } - if (type & T_0F) { - if (!str.empty()) str += " | "; - str += "T_0F"; - } - if (type & T_0F38) { - if (!str.empty()) str += " | "; - str += "T_0F38"; - } - if (type & T_0F3A) { - if (!str.empty()) str += " | "; - str += "T_0F3A"; - } - if (type & T_L0) { - if (!str.empty()) str += " | "; - str += "VEZ_L0"; - } - if (type & T_L1) { - if (!str.empty()) str += " | "; - str += "VEZ_L1"; - } - if (type & T_W0) { - if (!str.empty()) str += " | "; - str += "T_W0"; - } - if (type & T_W1) { - if (!str.empty()) str += " | "; - str += "T_W1"; - } - if (type & T_EW0) { - if (!str.empty()) str += " | "; - str += "T_EW0"; - } - if (type & T_EW1) { - if (!str.empty()) str += " | "; - str += "T_EW1"; - } - if (type & T_YMM) { - if (!str.empty()) str += " | "; - str += "T_YMM"; - } - if (type & T_EVEX) { - if (!str.empty()) str += " | "; - str += "T_EVEX"; - } - if (type & T_ER_X) { - if (!str.empty()) str += " | "; - str += "T_ER_X"; - } - if (type & T_ER_Y) { - if (!str.empty()) str += " | "; - str += "T_ER_Y"; - } - if (type & T_ER_Z) { - if (!str.empty()) str += " | "; - str += "T_ER_Z"; - } - if (type & T_SAE_X) { - if (!str.empty()) str += " | "; - str += "T_SAE_X"; - } - if (type & T_SAE_Y) { - if (!str.empty()) str += " | "; - str += "T_SAE_Y"; - } - if (type & T_SAE_Z) { - if (!str.empty()) str += " | "; - str += "T_SAE_Z"; - } - if (type & T_MUST_EVEX) { - if (!str.empty()) str += " | "; - str += "T_MUST_EVEX"; - } - if (type & T_B32) { - if (!str.empty()) str += " | "; - str += "T_B32"; - } - if (type & T_B64) { - if (!str.empty()) str += " | "; - str += "T_B64"; - } - if (type & T_M_K) { - if (!str.empty()) str += " | "; - str += "T_M_K"; - } - if (type & T_VSIB) { - if (!str.empty()) str += " | "; - str += "T_VSIB"; - } - if (type & T_MEM_EVEX) { - if (!str.empty()) str += " | "; - str += "T_MEM_EVEX"; - } - return str; -} diff --git a/core/deps/xbyak/gen/b2hex.cpp b/core/deps/xbyak/gen/b2hex.cpp deleted file mode 100644 index 150ade8b85..0000000000 --- a/core/deps/xbyak/gen/b2hex.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include - -int main() -{ - puts("enum {"); - for (int i = 0; i < 256; i++) { - printf(" B"); - for (int j = 0; j < 8; j++) { - putchar(i & (1 << (7 - j)) ? '1' : '0'); - } - printf("= %d", i); - if (i < 255) putchar(','); - putchar('\n'); - } - puts("};"); - return 0; -} \ No newline at end of file diff --git a/core/deps/xbyak/gen/gen_avx512.cpp b/core/deps/xbyak/gen/gen_avx512.cpp deleted file mode 100644 index b2b88c3fd3..0000000000 --- a/core/deps/xbyak/gen/gen_avx512.cpp +++ /dev/null @@ -1,750 +0,0 @@ -#define XBYAK_DONT_READ_LIST -#include -#include -#include "../xbyak/xbyak.h" -#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) - -using namespace Xbyak; -#ifdef _MSC_VER - #pragma warning(disable : 4996) // scanf - #define snprintf _snprintf_s -#endif - -#include "avx_type.hpp" - -void putOpmask(bool only64bit) -{ - if (only64bit) { - puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }"); - puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }"); - return; - } - - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "kadd", 0x4A }, - { "kand", 0x41 }, - { "kandn", 0x42 }, - { "kor", 0x45 }, - { "kxnor", 0x46 }, - { "kxor", 0x47 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code); - printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code); - printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); - printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); - } - printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n"); - printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n"); - printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n"); - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "knot", 0x44 }, - { "kortest", 0x98 }, - { "ktest", 0x99 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code); - printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code); - printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); - printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); - } - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "kshiftl", 0x32 }, - { "kshiftr", 0x30 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); - printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); - printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); - printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); - } - } - puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); - puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); - puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); - puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); - - puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }"); - puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }"); - puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }"); - puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }"); - - puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }"); - puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }"); - puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }"); - puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }"); - puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }"); - puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }"); -} - -// vcmppd(k, x, op) -void putVcmp() -{ - const struct Tbl { - uint8_t code; - const char *name; - int type; - bool hasIMM; - } tbl[] = { - { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true }, - { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true }, - { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, - { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, - - { 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, - { 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, - { 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false }, - { 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - - { 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, - { 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, - { 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, - { 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - - { 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, - { 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, - - { 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, - { 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, - { 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, - { 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, - { 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, - { 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, - - { 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, - { 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, - { 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, - { 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - - { 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, - { 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, - { 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, - { 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); - } -} - -// XM_X -void putX_XM() -{ - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, - { 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, - { 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, - { 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, - { 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, - { 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, - { 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, - // putCvt - { 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, - { 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z }, - { 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, - { 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, - { 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, - { 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, - { 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z }, - { 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z }, - - { 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, - { 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, - - { 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, - { 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, - { 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, - { 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); - } - puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }"); - - puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }"); - puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }"); - puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }"); - puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }"); - - puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }"); - puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }"); -} - -void putM_X() -{ - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, - { 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, - { 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, - { 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, - { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, - { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); - } -} - -void putXM_X() -{ - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, - { 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, - - { 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, - { 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, - - { 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 }, - { 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); - } -} - -void putX_X_XM_IMM() -{ - const struct Tbl { - uint8_t code; - const char *name; - int type; - bool hasIMM; - } tbl[] = { - { 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true }, - { 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true }, - { 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, - { 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, - { 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, - { 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, - { 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false }, - { 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, - { 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, - { 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, - { 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, - { 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - - { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, - { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - - { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - - { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, - { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, - - { 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - { 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, - { 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, - { 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, - { 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, - { 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - { 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, - { 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, - { 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - { 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, - { 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, - - { 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false }, - { 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false }, - { 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, - { 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, - - { 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true }, - { 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true }, - { 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true }, - { 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true }, - - { 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false }, - { 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false }, - - { 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false }, - { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false }, - - { 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true }, - { 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true }, - - { 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false }, - { 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false }, - { 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, - { 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, - - { 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true }, - { 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, - { 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, - - { 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, - { 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, - - { 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, - { 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, - { 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, - { 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, - - { 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, - { 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, - - { 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - { 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, - { 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true }, - { 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true }, - - { 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, - { 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, - - { 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, - { 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true }, - { 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true }, - - { 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, - { 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, - - { 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); - } -} - -void putShift() -{ - const struct Tbl { - const char *name; - uint8_t code; - int idx; - int type; - } tbl[] = { - { "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 }, - { "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, - { "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, - { "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, - { "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); - } -} - -void putExtractInsert() -{ - { - const struct Tbl { - const char *name; - uint8_t code; - int type; - bool isZMM; - } tbl[] = { - { "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, - { "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, - { "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, - { "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, - - { "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, - { "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, - { "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, - { "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; - printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); - } - } - { - const struct Tbl { - const char *name; - uint8_t code; - int type; - bool isZMM; - } tbl[] = { - { "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, - { "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, - { "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, - { "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, - - { "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, - { "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, - { "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, - { "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - const char *x = p.isZMM ? "Zmm" : "Ymm"; - const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; - printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {" - "if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) " - "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code); - } - } -} - -void putBroadcast(bool only64bit) -{ - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - int reg; - } tbl[] = { - { 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 }, - { 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 }, - { 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 }, - { 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64}, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) { - printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code); - } - } - } - if (only64bit) return; - puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }"); - puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }"); - puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }"); - puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }"); - puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }"); - - puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }"); - puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }"); - puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }"); - puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }"); - puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }"); -} - -void putCvt() -{ - puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }"); - puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }"); - puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }"); - puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }"); - puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }"); - puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }"); - puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }"); - puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }"); - - puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }"); - puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }"); - puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }"); - puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }"); - puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }"); - puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); - puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); -} - -enum { // same as xbyak.h - xx_yy_zz = 0, - xx_yx_zy = 1, - xx_xy_yz = 2, -}; -void putGather() -{ - const struct Tbl { - const char *name; - int type; - uint8_t code; - int mode; - } tbl[] = { - { "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz }, - { "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy }, - { "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz }, - { "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz }, - { "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz }, - { "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy }, - { "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz }, - { "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type | T_VSIB); - printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); - } -} -void putScatter() -{ - const struct Tbl { - const char *name; - int type; - uint8_t code; - int mode; // reverse of gather - } tbl[] = { - { "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz }, - { "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy }, - { "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz }, - { "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz }, - - { "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz }, - { "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy }, - { "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz }, - { "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type | T_VSIB); - printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); - } -} - -void putShuff() -{ - puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); - puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); - puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); - puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); -} - -void putMov() -{ - puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }"); - puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }"); - puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }"); - puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }"); - - puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }"); - puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }"); - puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }"); - puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }"); - - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - int mode; - } tbl[] = { - { 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, - { 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, - { 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, - - { 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, - { 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, - { 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, - - { 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - { 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - { 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - - { 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, - { 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, - { 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, - - { 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - { 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - { 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - - { 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - { 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - { 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false"); - } - } -} - -void putX_XM_IMM() -{ - const struct Tbl { - uint8_t code; - const char *name; - int type; - bool hasIMM; - } tbl[] = { - { 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, - { 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, - { 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - { 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - - { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - - { 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, - { 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, - - { 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, - { 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, - - { 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, - { 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, - - { 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false }, - { 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, - { 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, - - { 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false }, - { 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); - } -} - -void putMisc() -{ - puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }"); - puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }"); - { - const struct Tbl { - const char *name; - int zm; - int type; - uint8_t code; - bool isZmm; - } tbl[] = { - { "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true }, - { "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true }, - { "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false }, - { "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true }, - - { "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true }, - { "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true }, - { "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false }, - { "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true }, - - { "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true }, - { "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true }, - { "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false }, - { "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true }, - - { "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true }, - { "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true }, - { "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false }, - { "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB); - printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n" - , p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM"); - } - } - - puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); - puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); - puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); - puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); - - puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }"); - puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }"); - - puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }"); - puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }"); -} - -void putV4FMA() -{ - puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }"); - puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }"); - puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }"); - puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }"); - puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }"); - puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }"); -} - -int main(int argc, char *[]) -{ - bool only64bit = argc == 2; - putOpmask(only64bit); - putBroadcast(only64bit); - if (only64bit) { - return 0; - } - putVcmp(); - putX_XM(); - putM_X(); - putXM_X(); - putX_X_XM_IMM(); - putShift(); - putExtractInsert(); - putCvt(); - putGather(); - putShuff(); - putMov(); - putX_XM_IMM(); - putMisc(); - putScatter(); - putV4FMA(); -} diff --git a/core/deps/xbyak/gen/gen_code.cpp b/core/deps/xbyak/gen/gen_code.cpp deleted file mode 100644 index f3b23e85e5..0000000000 --- a/core/deps/xbyak/gen/gen_code.cpp +++ /dev/null @@ -1,1929 +0,0 @@ -#define XBYAK_DONT_READ_LIST -#include -#include -#include "xbyak/xbyak.h" -#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) - -using namespace Xbyak; -#ifdef _MSC_VER - #pragma warning(disable : 4996) // scanf - #define snprintf _snprintf_s -#endif - -#include "avx_type.hpp" -/* - reg = cx/ecx/rcx - insert 0x67 if prefix is true -*/ -void put_jREGz(const char *reg, bool prefix) -{ - printf("void j%sz(std::string label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : ""); - printf("void j%sz(const Label& label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : ""); -} - -struct GenericTbl { - const char *name; - uint8_t code1; - uint8_t code2; - uint8_t code3; - uint8_t code4; -}; - -void putGeneric(const GenericTbl *p, size_t n) -{ - for (size_t i = 0; i < n; i++) { - printf("void %s() { db(0x%02X); ", p->name, p->code1); - if (p->code2) printf("db(0x%02X); ", p->code2); - if (p->code3) printf("db(0x%02X); ", p->code3); - if (p->code4) printf("db(0x%02X); ", p->code4); - printf("}\n"); - p++; - } -} - -void putX_X_XM(bool omitOnly) -{ - // (x, x, x/m[, imm]) or (y, y, y/m[, imm]) - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - bool hasIMM; - bool enableOmit; - int mode; // 1 : sse, 2 : avx, 3 : sse + avx - } tbl[] = { - { 0x0D, "blendpd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, - { 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, - { 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 }, - { 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, - { 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, - { 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, - { 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 }, - { 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 }, - { 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 }, - { 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 }, - { 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, - { 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, - - { 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, - { 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, - { 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, - { 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, - { 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, - - { 0xC2, "cmppd", T_0F | T_66 | T_YMM, true, true, 2 }, - { 0xC2, "cmpps", T_0F | T_YMM, true, true, 2 }, - { 0xC2, "cmpsd", T_0F | T_F2, true, true, 2 }, - { 0xC2, "cmpss", T_0F | T_F3, true, true, 2 }, - { 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 }, - { 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 }, - { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 }, - { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, - - { 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - { 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - - { 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0x0F, "palignr", T_0F3A | T_66 | T_YMM | T_EVEX, true, true, 2 }, - - { 0xDB, "pand", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0xDF, "pandn", T_0F | T_66 | T_YMM, false, true, 2 }, - - { 0xE0, "pavgb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xE3, "pavgw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0x74, "pcmpeqb", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0x75, "pcmpeqw", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0x76, "pcmpeqd", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0x29, "pcmpeqq", T_0F38 | T_66 | T_YMM, false, true, 3 }, - - { 0x64, "pcmpgtb", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0x65, "pcmpgtw", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0x66, "pcmpgtd", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0x37, "pcmpgtq", T_0F38 | T_66 | T_YMM, false, true, 3 }, - - { 0x01, "phaddw", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0x02, "phaddd", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0x03, "phaddsw", T_0F38 | T_66 | T_YMM, false, true, 2 }, - - { 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, - { 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, - - { 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, - { 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, - - { 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, - { 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, - - { 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, - { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, - - { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, - - { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 3 }, - - { 0xEB, "por", T_0F | T_66 | T_YMM, false, true, 2 }, - { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX, false, false, 2 }, - - { 0x08, "psignb", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0x09, "psignw", T_0F38 | T_66 | T_YMM, false, true, 2 }, - { 0x0A, "psignd", T_0F38 | T_66 | T_YMM, false, true, 2 }, - - { 0xF1, "psllw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, - { 0xF2, "pslld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, - { 0xF3, "psllq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 }, - - { 0xE1, "psraw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, - { 0xE2, "psrad", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, - { 0xD1, "psrlw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, - { 0xD2, "psrld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, - { 0xD3, "psrlq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 }, - - { 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - { 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - - { 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - - { 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x6A, "punpckhdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - { 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - - { 0x60, "punpcklbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x61, "punpcklwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, - { 0x62, "punpckldq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - { 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - - { 0xEF, "pxor", T_0F | T_66 | T_YMM, false, true, 2 }, - - { 0x53, "rcpss", T_0F | T_F3, false, true, 2 }, - { 0x52, "rsqrtss", T_0F | T_F3, false, true, 2 }, - - { 0xC6, "shufpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, true, 2 }, - { 0xC6, "shufps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, true, true, 2 }, - - { 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X | T_N8, false, true, 2 }, - { 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X | T_N4, false, true, 2 }, - - { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - - { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, - { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, - - { 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 }, - { 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 }, - { 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - if (omitOnly) { - if (p->enableOmit) { - printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : ""); - } - } else { - if (p->mode & 1) { - if (p->hasIMM) { - printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); - } else { - printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code); - } - } - if (p->mode & 2) { - printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); - } - } - } - } -} - -void putMemOp(const char *name, uint8_t prefix, uint8_t ext, uint8_t code1, int code2, int bit = 32) -{ - printf("void %s(const Address& addr) { ", name); - if (prefix) printf("db(0x%02X); ", prefix); - printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2); -} - -void putLoadSeg(const char *name, uint8_t code1, int code2 = NONE) -{ - printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2); -} - -void put() -{ - const int NO = CodeGenerator::NONE; - { - char buf[16]; - unsigned int v = VERSION; - if (v & 0xF) { - snprintf(buf, sizeof(buf), "%d.%02X%x", v >> 12, (v >> 4) & 0xFF, v & 0xF); - } else { - snprintf(buf, sizeof(buf), "%d.%02X", v >> 12, (v >> 4) & 0xFF); - } - printf("const char *getVersionString() const { return \"%s\"; }\n", buf); - } - const int B = 1 << 0; - const int W = 1 << 1; - const int D = 1 << 2; - const int Q = 1 << 3; - { - const struct Tbl { - uint8_t code; - const char *name; - } tbl[] = { - // MMX - { 0x6B, "packssdw" }, - { 0x63, "packsswb" }, - { 0x67, "packuswb" }, - - { 0xDB, "pand" }, - { 0xDF, "pandn" }, - - { 0xF5, "pmaddwd" }, - { 0xE4, "pmulhuw" }, - { 0xE5, "pmulhw" }, - { 0xD5, "pmullw" }, - - { 0xEB, "por" }, - - { 0x68, "punpckhbw" }, - { 0x69, "punpckhwd" }, - { 0x6A, "punpckhdq" }, - - { 0x60, "punpcklbw" }, - { 0x61, "punpcklwd" }, - { 0x62, "punpckldq" }, - - { 0xEF, "pxor" }, - - // MMX2 - { 0xE0, "pavgb" }, - { 0xE3, "pavgw" }, - { 0xEE, "pmaxsw" }, - { 0xDE, "pmaxub" }, - { 0xEA, "pminsw" }, - { 0xDA, "pminub" }, - { 0xF6, "psadbw" }, - // - { 0xD4, "paddq" }, - { 0xF4, "pmuludq" }, - { 0xFB, "psubq" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n" - , p->name, p->code); - } - } - - { - const struct Tbl { - uint8_t code; - int mode; - const char *name; - } tbl[] = { - { 0xFC, B|W|D, "padd" }, - { 0xEC, B|W , "padds" }, - { 0xDC, B|W , "paddus" }, - { 0x74, B|W|D, "pcmpeq" }, - { 0x64, B|W|D, "pcmpgt" }, - { 0xF0, W|D|Q, "psll" }, - { 0xE0, W|D , "psra" }, - { 0xD0, W|D|Q, "psrl" }, - { 0xF8, B|W|D, "psub" }, - { 0xE8, B|W , "psubs" }, - { 0xD8, B|W , "psubus" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - static const char modTbl[][4] = { - "b", "w", "d", "q" - }; - for (int j = 0; j < 4; j++) { - // B(0), W(1), D(2), Q(3) - if (!(p->mode & (1 << j))) continue; - printf("void %s%s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n" - , p->name, modTbl[j] - , p->code | j - ); - } - } - } - - { - const struct Tbl { - uint8_t code; - int ext; - int mode; - const char *name; - } tbl[] = { - { 0x70, 6, W|D|Q, "psll" }, - { 0x70, 4, W|D , "psra" }, - { 0x70, 2, W|D|Q, "psrl" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - static const char modTbl[][4] = { - "b", "w", "d", "q" - }; - for (int j = 0; j < 4; j++) { - // B(0), W(1), D(2), Q(3) - if (!(p->mode & (1 << j))) continue; - printf("void %s%s(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x%02X, %d); }\n" - , p->name, modTbl[j] - , p->code | j - , p->ext - ); - } - } - printf("void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 7); - printf("void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 3); - } - - { - const struct Tbl { - uint8_t code; - uint8_t pref; - const char *name; - } tbl[] = { - { 0x70, 0, "pshufw" }, - { 0x70, 0xF2, "pshuflw" }, - { 0x70, 0xF3, "pshufhw" }, - { 0x70, 0x66, "pshufd" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); - } - } - { - const struct MmxTbl6 { - uint8_t code; // for (reg, reg/[mem]) - uint8_t code2; // for ([mem], reg) - int pref; - const char *name; - } mmxTbl6[] = { - { 0x6F, 0x7F, 0x66, "movdqa" }, - { 0x6F, 0x7F, 0xF3, "movdqu" }, - // SSE2 - { 0x28, 0x29, NO, "movaps" }, - { 0x10, 0x11, 0xF3, "movss" }, - { 0x10, 0x11, NO, "movups" }, - { 0x28, 0x29, 0x66, "movapd" }, - { 0x10, 0x11, 0xF2, "movsd" }, - { 0x10, 0x11, 0x66, "movupd" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) { - const MmxTbl6 *p = &mmxTbl6[i]; - printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); - printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name); - if (p->pref != NO) printf("db(0x%02X); ", p->pref); - printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n", p->code2); - } - } - { - enum { - PS = 1 << 0, - SS = 1 << 1, - PD = 1 << 2, - SD = 1 << 3 - }; - const struct { - int code; - const char *name; - } sufTbl[] = { - { NO, "ps" }, - { 0xF3, "ss" }, - { 0x66, "pd" }, - { 0xF2, "sd" }, - }; - const struct Tbl { - uint8_t code; - int mode; - const char *name; - bool hasImm; - } tbl[] = { - { 0x58, PS|SS|PD|SD, "add" }, - { 0x55, PS|PD , "andn" }, - { 0x54, PS|PD , "and" }, - { 0xC2, PS|SS|PD|SD, "cmp", true }, - { 0x5E, PS|SS|PD|SD, "div" }, - { 0x5F, PS|SS|PD|SD, "max" }, - { 0x5D, PS|SS|PD|SD, "min" }, - { 0x59, PS|SS|PD|SD, "mul" }, - { 0x56, PS|PD , "or" }, - { 0x53, PS|SS , "rcp" }, - { 0x52, PS|SS , "rsqrt" }, - { 0xC6, PS|PD , "shuf", true }, - { 0x51, PS|SS|PD|SD, "sqrt" }, - { 0x5C, PS|SS|PD|SD, "sub" }, - { 0x15, PS|PD , "unpckh" }, - { 0x14, PS|PD , "unpckl" }, - { 0x57, PS|PD , "xor" }, - // - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { - if (!(p->mode & (1 << j))) continue; - if (p->hasImm) { - // don't change uint8_t to int because NO is not in byte - printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); - } else { - printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); - } - } - } - } - { - // (XMM, XMM) - const struct Tbl { - uint8_t code; - uint8_t pref; - const char *name; - } tbl[] = { - { 0xF7, 0x66, "maskmovdqu" }, - { 0x12, 0 , "movhlps" }, - { 0x16, 0 , "movlhps" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name); - if (p->pref) printf("db(0x%02X); ", p->pref); - printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code); - } - } - { - // (XMM, XMM|MEM) - const struct Tbl { - uint8_t code; - int pref; - const char *name; - } tbl[] = { - { 0x6D, 0x66, "punpckhqdq" }, - { 0x6C, 0x66, "punpcklqdq" }, - - { 0x2F, NO , "comiss" }, - { 0x2E, NO , "ucomiss" }, - { 0x2F, 0x66, "comisd" }, - { 0x2E, 0x66, "ucomisd" }, - - { 0x5A, 0x66, "cvtpd2ps" }, - { 0x5A, NO , "cvtps2pd" }, - { 0x5A, 0xF2, "cvtsd2ss" }, - { 0x5A, 0xF3, "cvtss2sd" }, - { 0xE6, 0xF2, "cvtpd2dq" }, - { 0xE6, 0x66, "cvttpd2dq" }, - { 0xE6, 0xF3, "cvtdq2pd" }, - { 0x5B, 0x66, "cvtps2dq" }, - { 0x5B, 0xF3, "cvttps2dq" }, - { 0x5B, NO , "cvtdq2ps" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code, p->pref); - } - } - - { - // special type - const struct Tbl { - uint8_t code; - int pref; - const char *name; - const char *cond; - } tbl[] = { - { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" }, - { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" }, - { 0x2A, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" }, - { 0x2D, 0xF3, "cvtss2si", "isREG32_XMMorMEM" }, - { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" }, - { 0x2C, 0xF3, "cvttss2si", "isREG32_XMMorMEM" }, - { 0x2A, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" }, - { 0x2D, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" }, - { 0x2A, 0xF2, "cvtsi2sd", "isXMM_REG32orMEM" }, - { 0x2D, 0xF2, "cvtsd2si", "isREG32_XMMorMEM" }, - { 0x2C, 0x66, "cvttpd2pi", "isMMX_XMMorMEM" }, - { 0x2C, 0xF2, "cvttsd2si", "isREG32_XMMorMEM" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& reg, const Operand& op) { opGen(reg, op, 0x%02X, 0x%02X, %s); }\n", p->name, p->code, p->pref, p->cond); - } - } - { - // prefetch - const struct Tbl { - int ext; - const char *name; - int code; - } tbl[] = { - { 1, "t0", 0x18}, - { 2, "t1", 0x18}, - { 3, "t2", 0x18}, - { 0, "nta", 0x18}, - { 2, "wt1", 0x0D}, - { 1, "w", 0x0D}, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void prefetch%s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, 0x%02X); }\n", p->name, p->ext, p->code); - } - } - { - const struct Tbl { - uint8_t code; - int pref; - const char *name; - } tbl[] = { - { 0x16, NO, "movhps" }, - { 0x12, NO, "movlps" }, - { 0x16, 0x66, "movhpd" }, - { 0x12, 0x66, "movlpd" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); - } - } - { - // cmov - const struct Tbl { - uint8_t ext; - const char *name; - } tbl[] = { - { 0, "o" }, - { 1, "no" }, - { 2, "b" }, - { 2, "c" }, - { 2, "nae" }, - { 3, "nb" }, - { 3, "ae" }, - { 3, "nc" }, - { 4, "e" }, - { 4, "z" }, - { 5, "ne" }, - { 5, "nz" }, - { 6, "be" }, - { 6, "na" }, - { 7, "nbe" }, - { 7, "a" }, - { 8, "s" }, - { 9, "ns" }, - { 10, "p" }, - { 10, "pe" }, - { 11, "np" }, - { 11, "po" }, - { 12, "l" }, - { 12, "nge" }, - { 13, "nl" }, - { 13, "ge" }, - { 14, "le" }, - { 14, "ng" }, - { 15, "nle" }, - { 15, "g" }, - }; - const char *msg = "//-V524"; // disable warning of PVS-Studio - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void cmov%s(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | %d); }%s\n", p->name, p->ext, msg); - printf("void j%s(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); - printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); - printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg); - printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); - printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg); - } - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "loop", 0xE2 }, - { "loope", 0xE1 }, - { "loopne", 0xE0 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(std::string label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); - printf("void %s(const Label& label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); - printf("void %s(const char *label) { %s(std::string(label)); }\n", p->name, p->name); - } - } - //////////////////////////////////////////////////////////////// - { - const GenericTbl tbl[] = { - { "bnd", 0xf2 }, /* 0xf2 prefix for MPX */ - { "cbw", 0x66, 0x98 }, - { "cdq", 0x99 }, - { "clc", 0xF8 }, - { "cld", 0xFC }, - { "cli", 0xFA }, - { "cmc", 0xF5 }, - - { "cpuid", 0x0F, 0xA2 }, - { "cwd", 0x66, 0x99 }, - { "cwde", 0x98 }, - { "cmpsb", 0xA6 }, - { "cmpsw", 0x66, 0xA7 }, - { "cmpsd", 0xA7 }, - { "endbr32", 0xF3, 0x0F, 0x1E, 0xFB }, - { "endbr64", 0xF3, 0x0F, 0x1E, 0xFA }, - { "int3", 0xCC }, - { "scasb", 0xAE }, - { "scasw", 0x66, 0xAF }, - { "scasd", 0xAF }, - { "movsb", 0xA4 }, - { "leave", 0xC9 }, - { "lodsb", 0xAC }, - { "lodsw", 0x66, 0xAD }, - { "lodsd", 0xAD }, - { "movsw", 0x66, 0xA5 }, - { "movsd", 0xA5 }, - { "outsb", 0x6E }, - { "outsw", 0x66, 0x6F }, - { "outsd", 0x6F }, - { "stosb", 0xAA }, - { "stosw", 0x66, 0xAB }, - { "stosd", 0xAB }, - { "rep", 0xF3 }, - { "repe", 0xF3 }, - { "repz", 0xF3 }, - { "repne", 0xF2 }, - { "repnz", 0xF2 }, - - { "lahf", 0x9F }, - { "lock", 0xF0 }, - - { "sahf", 0x9E }, - { "stc", 0xF9 }, - { "std", 0xFD }, - { "sti", 0xFB }, - { "sysenter", 0x0F, 0x34 }, - { "sysexit", 0x0F, 0x35 }, - - { "emms", 0x0F, 0x77 }, - { "pause", 0xF3, 0x90 }, - { "sfence", 0x0F, 0xAE, 0xF8 }, - { "lfence", 0x0F, 0xAE, 0xE8 }, - { "mfence", 0x0F, 0xAE, 0xF0 }, - { "monitor", 0x0F, 0x01, 0xC8 }, - { "mwait", 0x0F, 0x01, 0xC9 }, - - { "rdmsr", 0x0F, 0x32 }, - { "rdpmc", 0x0F, 0x33 }, - { "rdtsc", 0x0F, 0x31 }, - { "rdtscp", 0x0F, 0x01, 0xF9 }, - { "ud2", 0x0F, 0x0B }, - { "wait", 0x9B }, - { "fwait", 0x9B }, - { "wbinvd", 0x0F, 0x09 }, - { "wrmsr", 0x0F, 0x30 }, - { "xlatb", 0xD7 }, - - { "popf", 0x9D }, - { "pushf", 0x9C }, - { "stac", 0x0F, 0x01, 0xCB }, - - { "vzeroall", 0xC5, 0xFC, 0x77 }, - { "vzeroupper", 0xC5, 0xF8, 0x77 }, - { "xgetbv", 0x0F, 0x01, 0xD0 }, - - // FPU - { "f2xm1", 0xD9, 0xF0 }, - { "fabs", 0xD9, 0xE1 }, - { "faddp", 0xDE, 0xC1 }, - { "fchs", 0xD9, 0xE0 }, - { "fclex", 0x9B, 0xDB, 0xE2 }, - { "fnclex", 0xDB, 0xE2 }, - { "fcom", 0xD8, 0xD1 }, - { "fcomp", 0xD8, 0xD9 }, - { "fcompp", 0xDE, 0xD9 }, - { "fcos", 0xD9, 0xFF }, - { "fdecstp", 0xD9, 0xF6 }, - { "fdivp", 0xDE, 0xF9 }, - { "fdivrp", 0xDE, 0xF1 }, - { "fincstp", 0xD9, 0xF7 }, - { "finit", 0x9B, 0xDB, 0xE3 }, - { "fninit", 0xDB, 0xE3 }, - { "fld1", 0xD9, 0xE8 }, - { "fldl2t", 0xD9, 0xE9 }, - { "fldl2e", 0xD9, 0xEA }, - { "fldpi", 0xD9, 0xEB }, - { "fldlg2", 0xD9, 0xEC }, - { "fldln2", 0xD9, 0xED }, - { "fldz", 0xD9, 0xEE }, - { "fmulp", 0xDE, 0xC9 }, - { "fnop", 0xD9, 0xD0 }, - { "fpatan", 0xD9, 0xF3 }, - { "fprem", 0xD9, 0xF8 }, - { "fprem1", 0xD9, 0xF5 }, - { "fptan", 0xD9, 0xF2 }, - { "frndint", 0xD9, 0xFC }, - { "fscale", 0xD9, 0xFD }, - { "fsin", 0xD9, 0xFE }, - { "fsincos", 0xD9, 0xFB }, - { "fsqrt", 0xD9, 0xFA }, - { "fsubp", 0xDE, 0xE9 }, - { "fsubrp", 0xDE, 0xE1 }, - { "ftst", 0xD9, 0xE4 }, - { "fucom", 0xDD, 0xE1 }, - { "fucomp", 0xDD, 0xE9 }, - { "fucompp", 0xDA, 0xE9 }, - { "fxam", 0xD9, 0xE5 }, - { "fxch", 0xD9, 0xC9 }, - { "fxtract", 0xD9, 0xF4 }, - { "fyl2x", 0xD9, 0xF1 }, - { "fyl2xp1", 0xD9, 0xF9 }, - - // AMD Zen - { "monitorx", 0x0F, 0x01, 0xFA }, - { "mwaitx", 0x0F, 0x01, 0xFB }, - { "clzero", 0x0F, 0x01, 0xFC }, - }; - putGeneric(tbl, NUM_OF_ARRAY(tbl)); - puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }"); - puts("void int_(uint8_t x) { db(0xCD); db(x); }"); - putLoadSeg("lss", 0x0F, 0xB2); - putLoadSeg("lfs", 0x0F, 0xB4); - putLoadSeg("lgs", 0x0F, 0xB5); - } - { - const struct Tbl { - uint8_t code; // (reg, reg) - uint8_t ext; // (reg, imm) - const char *name; - } tbl[] = { - { 0x10, 2, "adc" }, - { 0x00, 0, "add" }, - { 0x20, 4, "and_" }, - { 0x38, 7, "cmp" }, - { 0x08, 1, "or_" }, - { 0x18, 3, "sbb" }, - { 0x28, 5, "sub" }, - { 0x30, 6, "xor_" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); - } - } - - { - const struct Tbl { - uint8_t code; - uint8_t ext; - const char *name; - } tbl[] = { - { 0x48, 1, "dec" }, - { 0x40, 0, "inc" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op) { opIncDec(op, 0x%02X, %d); }\n", p->name, p->code, p->ext); - } - } - { - const struct Tbl { - uint8_t code; - uint8_t ext; - const char *name; - } tbl[] = { - { 0xa3, 4, "bt" }, - { 0xab, 5, "bts" }, - { 0xb3, 6, "btr" }, - { 0xbb, 7, "btc" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); - } - } - { - const struct Tbl { - uint8_t code; - uint8_t ext; - const char *name; - } tbl[] = { - { 0xF6, 6, "div" }, - { 0xF6, 7, "idiv" }, - { 0xF6, 5, "imul" }, - { 0xF6, 4, "mul" }, - { 0xF6, 3, "neg" }, - { 0xF6, 2, "not_" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - const std::string name = p->name; - printf("void %s(const Operand& op) { opR_ModM(op, 0, %d, 0x%02X); }\n", p->name, p->ext, p->code); - } - } - { - const struct Tbl { - const char *name; - uint8_t ext; - } tbl[] = { - { "rcl", 2 }, - { "rcr", 3 }, - { "rol", 0 }, - { "ror", 1 }, - { "sar", 7 }, - { "shl", 4 }, - { "shr", 5 }, - - { "sal", 4 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext); - printf("void %s(const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d); }\n", p->name, p->ext); - } - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "shld", 0xA4 }, - { "shrd", 0xAC }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code); - } - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "bsf", 0xBC }, - { "bsr", 0xBD }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x%02X); }\n", p->name, p->code); - } - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "popcnt", 0xB8 }, - { "tzcnt", 0xBC }, - { "lzcnt", 0xBD }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0x%02X); }\n", p->name, p->code); - } - } - // SSSE3 - { - const struct Tbl { - uint8_t code; - const char *name; - } tbl[] = { - { 0x00, "pshufb" }, - { 0x01, "phaddw" }, - { 0x02, "phaddd" }, - { 0x03, "phaddsw" }, - { 0x04, "pmaddubsw" }, - { 0x05, "phsubw" }, - { 0x06, "phsubd" }, - { 0x07, "phsubsw" }, - { 0x08, "psignb" }, - { 0x09, "psignw" }, - { 0x0a, "psignd" }, - { 0x0b, "pmulhrsw" }, - { 0x1c, "pabsb" }, - { 0x1d, "pabsw" }, - { 0x1e, "pabsd" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code); - } - printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); - } - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "pclmullqlqdq", 0 }, - { "pclmulhqlqdq", 1 }, - { "pclmullqhdq", 0x10 }, - { "pclmulhqhdq", 0x11 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x%02X); }\n", p->name, p->code); - } - } - { - const struct Tbl { - uint8_t code1; - int code2; - uint8_t ext; - const char *name; - uint8_t prefix; - } tbl[] = { - { 0x0F, 0xAE, 2, "ldmxcsr", 0 }, - { 0x0F, 0xAE, 3, "stmxcsr", 0 }, - { 0x0F, 0xAE, 7, "clflush", 0 }, - { 0x0F, 0xAE, 7, "clflushopt", 0x66 }, - { 0xDF, NONE, 4, "fbld", 0 }, - { 0xDF, NONE, 6, "fbstp", 0 }, - { 0xD9, NONE, 5, "fldcw", 0 }, - { 0xD9, NONE, 4, "fldenv", 0 }, - { 0xDD, NONE, 4, "frstor", 0 }, - { 0xDD, NONE, 6, "fsave", 0x9B }, - { 0xDD, NONE, 6, "fnsave", 0 }, - { 0xD9, NONE, 7, "fstcw", 0x9B }, - { 0xD9, NONE, 7, "fnstcw", 0 }, - { 0xD9, NONE, 6, "fstenv", 0x9B }, - { 0xD9, NONE, 6, "fnstenv", 0 }, - { 0xDD, NONE, 7, "fstsw", 0x9B }, - { 0xDD, NONE, 7, "fnstsw", 0 }, - { 0x0F, 0xAE, 1, "fxrstor", 0 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2); - } - puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }"); - puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }"); - } - { - const struct Tbl { - uint8_t code; - const char *name; - } tbl[] = { - { 0x2B, "movntpd" }, - { 0xE7, "movntdq" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - // cast xmm register to 16bit register to put 0x66 - printf("void %s(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x%02X); }\n", p->name, p->code); - } - } - { - const struct Tbl { - uint8_t code; - const char *name; - } tbl[] = { - { 0xBE, "movsx" }, - { 0xB6, "movzx" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code); - } - } - { // in/out - puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }"); - puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }"); - puts("void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }"); - puts("void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }"); - } - // mpx - { - puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); - puts("void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); - puts("void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }"); - puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }"); - puts("void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }"); - puts("void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }"); - puts("void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }"); - puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }"); - } - // misc - { - puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }"); - puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }"); - puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }"); - - puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }"); - puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }"); - puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }"); - puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }"); - puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }"); - puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }"); - puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }"); - - puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }"); - puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }"); - puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }"); - puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }"); - puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); - puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); - puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); - puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); - - puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }"); - puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }"); - puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }"); - puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }"); - puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }"); - puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }"); - puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }"); - puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }"); - puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }"); - - puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }"); - puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); - puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }"); - puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); - puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }"); - puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }"); - puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }"); - puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }"); - puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); - puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); - puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }"); - } - { - const struct Tbl { - uint8_t m16; - uint8_t m32; - uint8_t m64; - uint8_t ext; - const char *name; - uint8_t m64ext; - } tbl[] = { - { 0x00, 0xD8, 0xDC, 0, "fadd" }, - { 0xDE, 0xDA, 0x00, 0, "fiadd" }, - { 0x00, 0xD8, 0xDC, 2, "fcom" }, - { 0x00, 0xD8, 0xDC, 3, "fcomp" }, - { 0x00, 0xD8, 0xDC, 6, "fdiv" }, - { 0xDE, 0xDA, 0x00, 6, "fidiv" }, - { 0x00, 0xD8, 0xDC, 7, "fdivr" }, - { 0xDE, 0xDA, 0x00, 7, "fidivr" }, - { 0xDE, 0xDA, 0x00, 2, "ficom" }, - { 0xDE, 0xDA, 0x00, 3, "ficomp" }, - { 0xDF, 0xDB, 0xDF, 0, "fild", 5 }, - { 0xDF, 0xDB, 0x00, 2, "fist" }, - { 0xDF, 0xDB, 0xDF, 3, "fistp", 7 }, - { 0xDF, 0xDB, 0xDD, 1, "fisttp" }, - { 0x00, 0xD9, 0xDD, 0, "fld" }, - { 0x00, 0xD8, 0xDC, 1, "fmul" }, - { 0xDE, 0xDA, 0x00, 1, "fimul" }, - { 0x00, 0xD9, 0xDD, 2, "fst" }, - { 0x00, 0xD9, 0xDD, 3, "fstp" }, - { 0x00, 0xD8, 0xDC, 4, "fsub" }, - { 0xDE, 0xDA, 0x00, 4, "fisub" }, - { 0x00, 0xD8, 0xDC, 5, "fsubr" }, - { 0xDE, 0xDA, 0x00, 5, "fisubr" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Address& addr) { opFpuMem(addr, 0x%02X, 0x%02X, 0x%02X, %d, %d); }\n", p->name, p->m16, p->m32, p->m64, p->ext, p->m64ext); - } - } - { - const struct Tbl { - uint32_t code1; - uint32_t code2; - const char *name; - } tbl[] = { - { 0xD8C0, 0xDCC0, "fadd" }, - { 0x0000, 0xDEC0, "faddp" }, - - { 0xDAC0, 0x00C0, "fcmovb" }, - { 0xDAC8, 0x00C8, "fcmove" }, - { 0xDAD0, 0x00D0, "fcmovbe" }, - { 0xDAD8, 0x00D8, "fcmovu" }, - { 0xDBC0, 0x00C0, "fcmovnb" }, - { 0xDBC8, 0x00C8, "fcmovne" }, - { 0xDBD0, 0x00D0, "fcmovnbe" }, - { 0xDBD8, 0x00D8, "fcmovnu" }, - - { 0xDBF0, 0x00F0, "fcomi" }, - { 0xDFF0, 0x00F0, "fcomip" }, - { 0xDBE8, 0x00E8, "fucomi" }, - { 0xDFE8, 0x00E8, "fucomip" }, - - { 0xD8F0, 0xDCF8, "fdiv" }, - { 0x0000, 0xDEF8, "fdivp" }, - { 0xD8F8, 0xDCF0, "fdivr" }, - { 0x0000, 0xDEF0, "fdivrp" }, - { 0xD8C8, 0xDCC8, "fmul" }, - { 0x0000, 0xDEC8, "fmulp" }, - { 0xD8E0, 0xDCE8, "fsub" }, - { 0x0000, 0xDEE8, "fsubp" }, - { 0xD8E8, 0xDCE0, "fsubr" }, - { 0x0000, 0xDEE0, "fsubrp" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); - // omit st0 version(like nasm) - if (p->code1) { - printf("void %s(const Fpu& reg1) { opFpuFpu(st0, reg1, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); - } else { - printf("void %s(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); - } - } - } - { - const struct Tbl { - uint8_t code1; - uint8_t code2; - const char *name; - } tbl[] = { - { 0xD8, 0xD0, "fcom" }, - { 0xD8, 0xD8, "fcomp" }, - { 0xDD, 0xC0, "ffree" }, - { 0xD9, 0xC0, "fld" }, - { 0xDD, 0xD0, "fst" }, - { 0xDD, 0xD8, "fstp" }, - { 0xDD, 0xE0, "fucom" }, - { 0xDD, 0xE8, "fucomp" }, - { 0xD9, 0xC8, "fxch" }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void %s(const Fpu& reg) { opFpu(reg, 0x%02X, 0x%02X); }\n", p->name, p->code1, p->code2); - } - } - // AVX - { // pd, ps, sd, ss - const struct Tbl { - uint8_t code; - const char *name; - bool only_pd_ps; - } tbl[] = { - { 0x58, "add", false }, - { 0x5C, "sub", false }, - { 0x59, "mul", false }, - { 0x5E, "div", false }, - { 0x5F, "max", false }, - { 0x5D, "min", false }, - { 0x54, "and", true }, - { 0x55, "andn", true }, - { 0x56, "or", true }, - { 0x57, "xor", true }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code); - printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code); - if (p->only_pd_ps) continue; - printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x%02X); }\n", p->name, p->code); - printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x%02X); }\n", p->name, p->code); - } - } - putX_X_XM(false); - - // (x, x/m[, imm]) or (y, y/m[, imm]) - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - bool hasIMM; - int mode; // 1 : SSE, 2 : AVX, 3 : SSE + AVX - } tbl[] = { - { 0x15, "blendvpd", T_0F38 | T_66, false, 1 }, - { 0x14, "blendvps", T_0F38 | T_66, false, 1 }, - { 0x10, "pblendvb", T_0F38 | T_66, false, 1 }, - { 0xDF, "aeskeygenassist", T_0F3A | T_66, true, 3 }, - { 0xDB, "aesimc", T_0F38 | T_66 | T_W0, false, 3 }, - { 0x09, "roundpd", T_0F3A | T_66 | T_YMM, true, 3 }, - { 0x08, "roundps", T_0F3A | T_66 | T_YMM, true, 3 }, - { 0x05, "permilpd", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, 2 }, - { 0x04, "permilps", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 }, - { 0x61, "pcmpestri", T_0F3A | T_66, true, 3 }, - { 0x60, "pcmpestrm", T_0F3A | T_66, true, 3 }, - { 0x63, "pcmpistri", T_0F3A | T_66, true, 3 }, - { 0x62, "pcmpistrm", T_0F3A | T_66, true, 3 }, - { 0x0E, "testps", T_0F38 | T_66 | T_YMM, false, 2 }, - { 0x0F, "testpd", T_0F38 | T_66 | T_YMM, false, 2 }, - { 0x2F, "comisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, - { 0x2F, "comiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, - { 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 }, - { 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 }, - { 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_SAE_Z, false, 2 }, - { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 }, - { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 }, - { 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, false, 3 }, - { 0x6F, "movdqa", T_0F | T_66 | T_YMM, false, 2 }, - { 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false, 2 }, - { 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 }, - { 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 }, - { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 }, - { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 }, - - { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 }, - { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 }, - { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, 2 }, - { 0x41, "phminposuw", T_0F38 | T_66, false, 3 }, - - { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, - { 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, - { 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 }, - { 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, - { 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, - { 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 }, - - { 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, - { 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, - { 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 }, - { 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, - { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, - { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 }, - - { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 }, - { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true, 2 }, - { 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX, true, 2 }, - - { 0x17, "ptest", T_0F38 | T_66 | T_YMM, false, 3 }, - { 0x53, "rcpps", T_0F | T_YMM, false, 2 }, - { 0x52, "rsqrtps", T_0F | T_YMM, false, 2 }, - - { 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64, false, 2 }, - { 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32, false, 2 }, - - { 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, - { 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, - - { 0xCC, "sha1rnds4", T_0F3A, true, 1 }, - { 0xC8, "sha1nexte", T_0F38, false, 1 }, - { 0xC9, "sha1msg1", T_0F38, false, 1 }, - { 0xCA, "sha1msg2", T_0F38, false, 1 }, - { 0xCB, "sha256rnds2", T_0F38, false, 1 }, - { 0xCC, "sha256msg1", T_0F38, false, 1 }, - { 0xCD, "sha256msg2", T_0F38, false, 1 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - if (p->mode & 1) { - const char *immS1 = p->hasIMM ? ", uint8_t imm" : ""; - const char *immS2 = p->hasIMM ? ", imm" : ", NONE"; - const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE"; - const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE"; - printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf); - } - if (p->mode & 2) { - printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); - } - } - } - // (m, x), (m, y) - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K }, - { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K }, - { 0x7F, "movdqa", T_0F | T_66 | T_YMM }, - { 0x7F, "movdqu", T_0F | T_F3 | T_YMM }, - { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K }, - { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n" - , p->name, type.c_str(), p->code); - } - } - // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m) - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - int mode; // 1 : sse, 2 : avx, 3 : sse + avx - } tbl[] = { - { 0xD0, "addsubpd", T_0F | T_66 | T_YMM, 3 }, - { 0xD0, "addsubps", T_0F | T_F2 | T_YMM, 3 }, - { 0x7C, "haddpd", T_0F | T_66 | T_YMM, 3 }, - { 0x7C, "haddps", T_0F | T_F2 | T_YMM, 3 }, - { 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 }, - { 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 }, - - { 0xDC, "aesenc", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, - { 0xDD, "aesenclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, - { 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, - { 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - if (p->mode & 1) { - uint8_t pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0; - printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : ""); - } - if (p->mode & 2) { - printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n" - , p->name, type.c_str(), p->code); - } - } - } - // vmaskmov - { - const char suf[][8] = { "ps", "pd" }; - for (int i = 0; i < 2; i++) { - printf("void vmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2C + i); - printf("void vmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2E + i); - } - } - // vpmaskmov - { - const char suf[][8] = { "d", "q" }; - for (int i = 0; i < 2; i++) { - printf("void vpmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8C); - printf("void vpmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8E); - } - } - // vpermd, vpermps - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x36, "vpermd", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { 0x36, "vpermq", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64 }, - { 0x16, "vpermps", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { 0x16, "vpermpd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_YMM | T_B64 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); - } - } - // vpermq, vpermpd - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x00, "vpermq", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 }, - { 0x01, "vpermpd", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); - } - } - // vcmpeqps - { - const char pred[32][16] = { - "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", - "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", - "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", - "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" - }; - const char suf[][4] = { "pd", "ps", "sd", "ss" }; - for (int i = 0; i < 4; i++) { - const char *s = suf[i]; - for (int j = 0; j < 32; j++) { - if (j < 8) { - printf("void cmp%s%s(const Xmm& x, const Operand& op) { cmp%s(x, op, %d); }\n", pred[j], s, s, j); - } - printf("void vcmp%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmp%s(x1, x2, op, %d); }\n", pred[j], s, s, j); - } - } - } - // vmov(h|l)(pd|ps) - { - const struct Tbl { - bool isH; - bool isPd; - uint8_t code; - } tbl[] = { - { true, true, 0x16 }, - { true, false, 0x16 }, - { false, true, 0x12 }, - { false, false, 0x12 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - char c = p.isH ? 'h' : 'l'; - const char *suf = p.isPd ? "pd" : "ps"; - const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8"; - printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" - , c, suf, type, p.code); - printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n" - , c, suf, type, p.code + 1); - } - } - // FMA - { - const struct Tbl { - uint8_t code; - const char *name; - bool supportYMM; - } tbl[] = { - { 0x08, "vfmadd", true }, - { 0x09, "vfmadd", false }, - { 0x06, "vfmaddsub", true }, - { 0x07, "vfmsubadd", true }, - { 0x0A, "vfmsub", true }, - { 0x0B, "vfmsub", false }, - { 0x0C, "vfnmadd", true }, - { 0x0D, "vfnmadd", false }, - { 0x0E, "vfnmsub", true }, - { 0x0F, "vfnmsub", false }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - for (int j = 0; j < 2; j++) { - const char sufTbl[][2][8] = { - { "pd", "ps" }, - { "sd", "ss" }, - }; - for (int k = 0; k < 3; k++) { - const struct Ord { - const char *str; - uint8_t code; - } ord[] = { - { "132", 0x90 }, - { "213", 0xA0 }, - { "231", 0xB0 }, - }; - int t = T_0F38 | T_66 | T_EVEX; - t |= (j == 0) ? (T_W1 | T_EW1) : (T_W0 | T_EW0); - if (tbl[i].supportYMM) t |= T_YMM; - const std::string suf = sufTbl[tbl[i].supportYMM ? 0 : 1][j]; - if (suf == "pd") { - t |= T_B64; - } else if (suf == "ps") { - t |= T_B32; - } else if (suf == "sd") { - t |= T_ER_X | T_N8; - } else { // ss - t |= T_ER_X | T_N4; - } - std::string type = type2String(t); - printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" - , tbl[i].name, ord[k].str, suf.c_str(), type.c_str(), tbl[i].code + ord[k].code); - } - } - } - } - // FMA others - { - printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n"); - printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n"); - printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); - const struct Tbl { - const char *name; - uint8_t code; - int type; - bool ew1; - } tbl[] = { - { "vbroadcastss", 0x18, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 }, - { "vpbroadcastb", 0x78, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N1 }, - { "vpbroadcastw", 0x79, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N2 }, - { "vpbroadcastd", 0x58, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 }, - { "vpbroadcastq", 0x59, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); - } - - puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); - puts("void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); - puts("void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); - puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); - puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); - puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); - puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); - - puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }"); - puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }"); - puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }"); - puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }"); - - puts("void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); - puts("void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); - puts("void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); - puts("void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); - - puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); - puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); - puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); - puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); - - puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); - - } - // (x, x, imm), (x, imm) - { - const struct Tbl { - const char *name; - uint8_t code; - int idx; - int type; - } tbl[] = { - { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, - { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, - { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, - { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, - { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, - { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, - { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, - { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, - { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, - { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); - } - } - // 4-op - { - const struct Tbl { - const char *name; - uint8_t code; - } tbl[] = { - { "vblendvpd", 0x4B }, - { "vblendvps", 0x4A }, - { "vpblendvb", 0x4C }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x%02X, x4.getIdx() << 4); }\n", p.name, p.code); - } - } - // mov - { - printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); - printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); - - printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); - printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); - printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); - - printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); - printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); - - printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); - printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); - - puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }"); - puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }"); - puts("void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }"); - puts("void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }"); - - // vmovsd, vmovss - for (int i = 0; i < 2; i++) { - char c1 = i == 0 ? 'd' : 's'; - int type = T_0F | T_EVEX; - type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4); - std::string s = type2String(type); - printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); - printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str()); - printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str()); - } - } - // cvt - { - puts("void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }"); - puts("void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }"); - puts("void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }"); - puts("void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }"); - - puts("void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }"); - puts("void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }"); - - - puts("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }"); - puts("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }"); - - puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }"); - puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); - - puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); - - puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }"); - puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); - - } - // haswell gpr(reg, reg, r/m) - { - const struct Tbl { - const char *name; - int type; - uint8_t code; - } tbl[] = { - { "andn", T_0F38, 0xF2 }, - { "mulx", T_F2 | T_0F38, 0xF6 }, - { "pdep", T_F2 | T_0F38, 0xF5 }, - { "pext", T_F3 | T_0F38, 0xF5 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, %s, 0x%x, true); }\n", p.name, type2String(p.type).c_str(), p.code); - } - } - // gpr(reg, r/m, reg) - { - const struct Tbl { - const char *name; - int type; - uint8_t code; - } tbl[] = { - { "bextr", T_0F38, 0xF7 }, - { "bzhi", T_0F38, 0xF5 }, - { "sarx", T_0F38 | T_F3, 0xF7 }, - { "shlx", T_0F38 | T_66, 0xF7 }, - { "shrx", T_0F38 | T_F2, 0xF7 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code); - } - puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); - } - // gpr(reg, r/m) - { - const struct Tbl { - const char *name; - int type; - uint8_t code; - uint8_t idx; - } tbl[] = { - { "blsi", T_0F38, 0xF3, 3 }, - { "blsmsk", T_0F38, 0xF3, 2 }, - { "blsr", T_0F38, 0xF3, 1 }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code); - } - } - // gather - { - const int y_vx_y = 0; - const int y_vy_y = 1; - const int x_vy_x = 2; - const struct Tbl { - const char *name; - uint8_t code; - int w; - int mode; - } tbl[] = { - { "vgatherdpd", 0x92, 1, y_vx_y }, - { "vgatherqpd", 0x93, 1, y_vy_y }, - { "vgatherdps", 0x92, 0, y_vy_y }, - { "vgatherqps", 0x93, 0, x_vy_x }, - { "vpgatherdd", 0x90, 0, y_vy_y }, - { "vpgatherqd", 0x91, 0, x_vy_x }, - { "vpgatherdq", 0x90, 1, y_vx_y }, - { "vpgatherqq", 0x91, 1, y_vy_y }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl& p = tbl[i]; - printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode); - } - } - // vnni - { - const struct Tbl { - uint8_t code; - const char *name; - int type; - } tbl[] = { - { 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, - { 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, - { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, - { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code); - } - } -} - -void put32() -{ - put_jREGz("cx", true); - put_jREGz("ecx", false); - - const GenericTbl tbl[] = { - { "aaa", 0x37 }, - { "aad", 0xD5, 0x0A }, - { "aam", 0xD4, 0x0A }, - { "aas", 0x3F }, - { "daa", 0x27 }, - { "das", 0x2F }, - { "into", 0xCE }, - { "popad", 0x61 }, - { "popfd", 0x9D }, - { "pusha", 0x60 }, - { "pushad", 0x60 }, - { "pushfd", 0x9C }, - { "popa", 0x61 }, - }; - putGeneric(tbl, NUM_OF_ARRAY(tbl)); - putLoadSeg("lds", 0xC5, NONE); - putLoadSeg("les", 0xC4, NONE); -} - -void put64() -{ - put_jREGz("ecx", true); - put_jREGz("rcx", false); - - const GenericTbl tbl[] = { - { "cdqe", 0x48, 0x98 }, - { "cqo", 0x48, 0x99 }, - { "cmpsq", 0x48, 0xA7 }, - { "popfq", 0x9D }, - { "pushfq", 0x9C }, - { "lodsq", 0x48, 0xAD }, - { "movsq", 0x48, 0xA5 }, - { "scasq", 0x48, 0xAF }, - { "stosq", 0x48, 0xAB }, - { "syscall", 0x0F, 0x05 }, - { "sysret", 0x0F, 0x07 }, - }; - putGeneric(tbl, NUM_OF_ARRAY(tbl)); - - putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64); - putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64); - puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); - puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); - puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); - puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); - puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); - - puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }"); - puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }"); - puts("void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }"); - puts("void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }"); - - puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }"); - puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }"); -} - -void putAMX_TILE() -{ - puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }"); - puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }"); - puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }"); - puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }"); - puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }"); - puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }"); - puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }"); -} -void putAMX_INT8() -{ - puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }"); - puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }"); - puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }"); - puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }"); -} -void putAMX_BF16() -{ - puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }"); -} - -void putFixed() -{ - puts("#ifdef XBYAK64"); - put64(); - putAMX_TILE(); - putAMX_INT8(); - putAMX_BF16(); - puts("#else"); - put32(); - puts("#endif"); - puts("#ifndef XBYAK_NO_OP_NAMES"); - const char *tbl[] = { - "and", "or", "xor", - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const char *name = tbl[i]; - printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name); - printf("void %s(const Operand& op, uint32_t imm) { %s_(op, imm); }\n", name, name); - } - puts("void not(const Operand& op) { not_(op); }"); - puts("#endif"); -} - -void putOmit() -{ - puts("void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }"); - puts("void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }"); - puts("void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }"); - puts("void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }"); - - puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }"); - puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }"); - { - const char pred[32][16] = { - "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", - "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", - "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", - "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" - }; - const char suf[][4] = { "pd", "ps", "sd", "ss" }; - for (int i = 0; i < 4; i++) { - const char *s = suf[i]; - for (int j = 0; j < 32; j++) { - printf("void vcmp%s%s(const Xmm& x, const Operand& op) { vcmp%s%s(x, x, op); }\n", pred[j], s, pred[j], s); - } - } - } - { - const char *tbl[] = { - "pslldq", - "psrldq", - "psllw", - "pslld", - "psllq", - "psraw", - "psrad", - "psrlw", - "psrld", - "psrlq", - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const char *name = tbl[i]; - printf("void v%s(const Xmm& x, uint8_t imm) { v%s(x, x, imm); }\n", name, name); - } - } - { - const char *tbl[] = { - "vblendvpd", - "vblendvps", - "vpblendvb", - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const char *name = tbl[i]; - printf("void %s(const Xmm& x1, const Operand& op, const Xmm& x4) { %s(x1, x1, op, x4); }\n", name, name); - } - } - putX_X_XM(true); -} - -int main(int argc, char *argv[]) -{ - std::string mode = argc == 2 ? argv[1] : ""; - if (mode == "") { - put(); - } else if (mode == "fixed") { - putFixed(); - } else { - putOmit(); - } -} diff --git a/core/deps/xbyak/gen/sortline.cpp b/core/deps/xbyak/gen/sortline.cpp deleted file mode 100644 index a70ed9feb3..0000000000 --- a/core/deps/xbyak/gen/sortline.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include -#include -#include - -typedef std::set StrSet; - -int main() -{ - StrSet ss; - std::string line; - while (std::getline(std::cin, line)) { - if (!line.empty() && line[line.size() - 1] == '\n') { - line.resize(line.size() - 1); - } - if (!line.empty()) { - ss.insert(line); - } - } - for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) { - std::cout << *i << std::endl; - } -} diff --git a/core/deps/xbyak/gen/update.bat b/core/deps/xbyak/gen/update.bat deleted file mode 100644 index 161ed87e9c..0000000000 --- a/core/deps/xbyak/gen/update.bat +++ /dev/null @@ -1,17 +0,0 @@ -@echo off -set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS -set TARGET=..\\xbyak\\xbyak_mnemonic.h -set SORT=sortline -cl gen_code.cpp %OPT% -gen_code | %SORT% > %TARGET% -echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET% -gen_code omit | %SORT% >> %TARGET% -echo #endif>>%TARGET% -gen_code fixed >> %TARGET% -cl gen_avx512.cpp %OPT% -echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET% -gen_avx512 | %SORT% >> %TARGET% -echo #ifdef XBYAK64>> %TARGET% -gen_avx512 64 | %SORT% >> %TARGET% -echo #endif>> %TARGET% -echo #endif>> %TARGET% diff --git a/core/deps/xbyak/readme.md b/core/deps/xbyak/readme.md deleted file mode 100644 index a67e8e981e..0000000000 --- a/core/deps/xbyak/readme.md +++ /dev/null @@ -1,617 +0,0 @@ -[![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml) - -# Xbyak 5.992 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ - -## Abstract - -Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. - -The pronunciation of Xbyak is `kəi-bja-k`. -It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world. - -## Feature -* header file only -* Intel/MASM like syntax -* fully support AVX-512 - -**Note**: -Use `and_()`, `or_()`, ... instead of `and()`, `or()`. -If you want to use them, then specify `-fno-operator-names` option to gcc/clang. - -### News -- vnni instructions such as vpdpbusd supports vex encoding. -- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. -- (Windows) `#include ` has been removed from xbyak.h, so add it explicitly if you need it. -- support exception-less mode see. [Exception-less mode](#exception-less-mode) -- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined. - -### Supported OS - -* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit) -* Linux(32bit, 64bit) -* Intel macOS - -### Supported Compilers - -Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin. - -## Install - -The following files are necessary. Please add the path to your compile directory. - -* xbyak.h -* xbyak_mnemonic.h -* xbyak_util.h - -Linux: -``` -make install -``` - -These files are copied into `/usr/local/include/xbyak`. - -## How to use it - -Inherit `Xbyak::CodeGenerator` class and make the class method. -``` -#include - -struct Code : Xbyak::CodeGenerator { - Code(int x) - { - mov(eax, x); - ret(); - } -}; -``` -Or you can pass the instance of CodeGenerator without inheriting. -``` -void genCode(Xbyak::CodeGenerator& code, int x) { - using namespace Xbyak::util; - code.mov(eax, x); - code.ret(); -} -``` - -Make an instance of the class and get the function -pointer by calling `getCode()` and call it. -``` -Code c(5); -int (*f)() = c.getCode(); -printf("ret=%d\n", f()); // ret = 5 -``` - -## Syntax -Similar to MASM/NASM syntax with parentheses. - -``` -NASM Xbyak -mov eax, ebx --> mov(eax, ebx); -inc ecx inc(ecx); -ret --> ret(); -``` - -## Addressing -Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory, -otherwise use `ptr`. - -``` -(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement] - [rip + 32bit disp] ; x64 only - -NASM Xbyak -mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]); -mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]); -test byte [esp], 4 --> test(byte [esp], 4); -inc qword [rax] --> inc(qword [rax]); -``` -**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type. - -### How to use Selector (Segment Register) -``` -mov eax, [fs:eax] --> putSeg(fs); - mov(eax, ptr [eax]); -mov ax, cs --> mov(ax, cs); -``` -**Note**: Segment class is not derived from `Operand`. - -## AVX - -``` -vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 -vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory -vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3); -``` - -**Note**: -If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility. -But the newer version will not support it. -``` -vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 -``` - -## AVX-512 - -``` -vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30); -vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]); -vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]); -vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2); -vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2); -vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae); - vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary. -vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5); - -vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]); -vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]); -vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]); -vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]); -vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]); -vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4); - -vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword - vcvtpd2dq(xmm16, ptr [eax+33]); // default xword -vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]); -vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256 -vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast - -vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512 -vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit -vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit - -vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX -vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above -vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding -``` -### Remark -* `k1`, ..., `k7` are opmask registers. - - `k0` is dealt as no mask. - - e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`. -* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. -* `k4 | k3` is different from `k3 | k4`. -* use `ptr_b` for broadcast `{1toX}`. X is automatically determined. -* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary. - -## Label -Two kinds of Label are supported. (String literal and Label class). - -### String literal -``` -L("L1"); - jmp("L1"); - - jmp("L2"); - ... - a few mnemonics (8-bit displacement jmp) - ... -L("L2"); - - jmp("L3", T_NEAR); - ... - a lot of mnemonics (32-bit displacement jmp) - ... -L("L3"); -``` - -* Call `hasUndefinedLabel()` to verify your code has no undefined label. -* you can use a label for immediate value of mov like as `mov(eax, "L2")`. - -### Support `@@`, `@f`, `@b` like MASM - -``` -L("@@"); // - jmp("@b"); // jmp to - jmp("@f"); // jmp to -L("@@"); // - jmp("@b"); // jmp to - mov(eax, "@b"); - jmp(eax); // jmp to -``` - -### Local label - -Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()` -are treated as a local label. -`inLocalLabel()` and `outLocalLabel()` can be nested. - -``` -void func1() -{ - inLocalLabel(); - L(".lp"); // ; local label - ... - jmp(".lp"); // jmp to - L("aaa"); // global label - outLocalLabel(); - - inLocalLabel(); - L(".lp"); // ; local label - func1(); - jmp(".lp"); // jmp to - inLocalLabel(); - jmp("aaa"); // jmp to -} -``` - -### short and long jump -Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified. -So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error. - -``` -jmp("short-jmp"); // short jmp -// small code -L("short-jmp"); - -jmp("long-jmp"); -// long code -L("long-jmp"); // throw exception -``` -Then specify T_NEAR for jmp. -``` -jmp("long-jmp", T_NEAR); // long jmp -// long code -L("long-jmp"); -``` -Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR. -``` -jmp("long-jmp"); // long jmp -// long code -L("long-jmp"); -``` - -### Label class - -`L()` and `jxx()` support Label class. - -``` - Xbyak::Label label1, label2; -L(label1); - ... - jmp(label1); - ... - jmp(label2); - ... -L(label2); -``` - -Use `putL` for jmp table -``` - Label labelTbl, L0, L1, L2; - mov(rax, labelTbl); - // rdx is an index of jump table - jmp(ptr [rax + rdx * sizeof(void*)]); -L(labelTbl); - putL(L0); - putL(L1); - putL(L2); -L(L0); - .... -L(L1); - .... -``` - -`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel. - -``` - Label label2; - Label label1 = L(); // make label1 ; same to Label label1; L(label1); - ... - jmp(label2); // label2 is not determined here - ... - assignL(label2, label1); // label2 <- label1 -``` -The `jmp` in the above code jumps to label1 assigned by `assignL`. - -**Note**: -* srcLabel must be used in `L()`. -* dstLabel must not be used in `L()`. - -`Label::getAddress()` returns the address specified by the label instance and 0 if not specified. -``` -// not AutoGrow mode -Label label; -assert(label.getAddress() == 0); -L(label); -assert(label.getAddress() == getCurr()); -``` - -### Rip ; relative addressing -``` -Label label; -mov(eax, ptr [rip + label]); // eax = 4 -... - -L(label); -dd(4); -``` -``` -int x; -... - mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB -``` - -## Code size -The default max code size is 4096 bytes. -Specify the size in constructor of `CodeGenerator()` if necessary. - -``` -class Quantize : public Xbyak::CodeGenerator { -public: - Quantize() - : CodeGenerator(8192) - { - } - ... -}; -``` - -## User allocated memory - -You can make jit code on prepared memory. - -Call `setProtectModeRE` yourself to change memory mode if using the prepared memory. - -``` -uint8_t alignas(4096) buf[8192]; // C++11 or later - -struct Code : Xbyak::CodeGenerator { - Code() : Xbyak::CodeGenerator(sizeof(buf), buf) - { - mov(rax, 123); - ret(); - } -}; - -int main() -{ - Code c; - c.setProtectModeRE(); // set memory to Read/Exec - printf("%d\n", c.getCode()()); -} -``` - -**Note**: See [sample/test0.cpp](sample/test0.cpp). - -### AutoGrow - -The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`. - -Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address. -``` -struct Code : Xbyak::CodeGenerator { - Code() - : Xbyak::CodeGenerator(, Xbyak::AutoGrow) - { - ... - } -}; -Code c; -// generate code for jit -c.ready(); // mode = Read/Write/Exec -``` - -**Note**: -* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address. - -### Read/Exec mode -Xbyak set Read/Write/Exec mode to memory to run jit code. -If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and -call `setProtectModeRE()` after generating jit code. - -``` -struct Code : Xbyak::CodeGenerator { - Code() - : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE) - { - mov(eax, 123); - ret(); - } -}; - -Code c; -c.setProtectModeRE(); -... - -``` -Call `readyRE()` instead of `ready()` when using `AutoGrow` mode. -See [protect-re.cpp](sample/protect-re.cpp). - -## Exception-less mode -If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`. -In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong. -The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`. -`CodeGenerator::reset()` calls `ClearError()`. - -## Macro - -* **XBYAK32** is defined on 32bit. -* **XBYAK64** is defined on 64bit. -* **XBYAK64_WIN** is defined on 64bit Windows(VC). -* **XBYAK64_GCC** is defined on 64bit gcc, cygwin. -* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, .... -* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future). -* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro. -* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`. - -## Sample - -* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64) -* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only) -* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64) -* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64) - -## License - -modified new BSD License -http://opensource.org/licenses/BSD-3-Clause - -## History -* 2021/May/09 ver 5.992 support endbr32 and endbr64 -* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14 -* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito) -* 2020/Oct/17 ver 5.98 support the form of [scale * reg] -* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc. -* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later -* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`. -* 2020/Jul/28 ver 5.94 remove #include (only windows) -* 2020/Jul/21 ver 5.93 support exception-less mode -* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov) -* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64 -* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso) -* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined. -* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask) -* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register -* 2020/Feb/26 ver 5.891 fix typo of type -* 2020/Jan/03 ver 5.89 fix error of vfpclasspd -* 2019/Dec/20 ver 5.88 fix compile error on Windows -* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified. -* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available) -* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later -* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined -* 2019/Oct/12 ver 5.83 exit(1) was removed -* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam) -* 2019/Sep/14 ver 5.81 support some generic mnemonics. -* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov) -* 2019/May/27 support vp2intersectd, vp2intersectq (not tested) -* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps -* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky) -* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage) -* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov -* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel -* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility -* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed -* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed -* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov -* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8) -* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) -* 2018/Sep/04 ver 5.71 L() returns a new label instance -* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting -* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday) -* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm -* 2018/Jul/26 ver 5.661 support mingw64 -* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect() -* 2018/Jun/26 ver 5.65 fix push(qword [mem]) -* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu -* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem) -* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso -* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it) -* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace -* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf) -* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix -* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage) -* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen) -* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan) -* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan) -* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso) -* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio -* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar) -* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed) -* 2017/May/13 ver 5.42 add movs{b,w,d,q} -* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso) -* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label -* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso) -* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N -* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro) -* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW -* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso) -* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso) -* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38) -* 2016/Nov/20 ver 5.10 add addressing [rip+&var] -* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio) -* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h -* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4 -* 2016/Aug/03 ver 5.01 disable omitted operand -* 2016/Jun/24 ver 5.00 support avx-512 instruction set -* 2016/Jun/13 avx-512 add mask instructions -* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu -* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp) -* 2016/Feb/04 ver 4.90 add jcc(const void *addr); -* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell) -* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere) -* 2015/Oct/05 ver 4.87 support segment selectors -* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere) -* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen) -* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff) -* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik) -* 2015/May/24 ver 4.82 support detection of F16C -* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere) -* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere) -* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac -* 2014/Oct/14 ver 4.70 support MmapAllocator -* 2014/Jun/13 ver 4.62 disable warning of VC2014 -* 2014/May/30 ver 4.61 support bt, bts, btr, btc -* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph -* 2014/Apr/11 ver 4.52 add detection of rdrand -* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels -* 2014/Mar/16 ver 4.50 support new Label -* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox -* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64() -* 2013/Oct/16 ver 4.21 label support std::string -* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64) -* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class -* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label -* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest). -* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions -* 2013/Mar/27 ver 3.80 support mov(reg, "label"); -* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz() -* 2013/Jan/15 ver 3.75 add setSize() to modify generated code -* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect() -* 2013/Jan/06 ver 3.73 use unordered_map if possible -* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const. -* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined. -* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util. -* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias) -* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit -* 2012/Nov/01 ver 3.61 add fldcw/fstcw -* 2012/May/03 ver 3.60 change interface of Allocator -* 2012/Mar/23 ver 3.51 fix userPtr mode -* 2012/Mar/19 ver 3.50 support AutoGrow mode -* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd -* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat) -* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya) -* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc -* 2011/May/24 ver 3.01 fix typo of OSXSAVE -* 2011/May/23 ver 3.00 add vcmpeqps and so on -* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) -* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe -* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm -* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest) -* 2011/Feb/04 ver 2.99 beta support AVX -* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp -* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist -* 2010/Jun/07 ver 2.29 fix call( - jmp("@b"); // jmp to - jmp("@f"); // jmp to - L("@@"); // - jmp("@b"); // jmp to - mov(eax, "@b"); - jmp(eax); // jmp to - -2. ラベルの局所化 - -ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます。 -inLocalLabel(), outLocalLabel()は入れ子にすることができます。 - -void func1() -{ - inLocalLabel(); - L(".lp"); // ; ローカルラベル - ... - jmp(".lp"); // jmpt to - L("aaa"); // グローバルラベル - outLocalLabel(); -} - -void func2() -{ - inLocalLabel(); - L(".lp"); // ; ローカルラベル - func1(); - jmp(".lp"); // jmp to - outLocalLabel(); -} - -上記サンプルではinLocalLabel(), outLocalLabel()が無いと、 -".lp"ラベルの二重定義エラーになります。 - -3. 新しいLabelクラスによるジャンプ命令 - -ジャンプ先を文字列による指定だけでなくラベルクラスを使えるようになりました。 - - Label label1, label2; - L(label1); - ... - jmp(label1); - ... - jmp(label2); - ... - L(label2); - -更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。 - - Label label2; - Label label1 = L(); // Label label1; L(label1);と同じ意味 - ... - jmp(label2); - ... - assignL(label2, label1); - -上記jmp命令はlabel1にジャンプします。 - -制限 -* srcLabelはL()により飛び先が確定していないといけません。 -* dstLabelはL()により飛び先が確定していてはいけません。 - -ラベルは`getAddress()`によりそのアドレスを取得できます。 -未定義のときは0が返ります。 -``` -// not AutoGrow mode -Label label; -assert(label.getAddress(), 0); -L(label); -assert(label.getAddress(), getCurr()); -``` - -・Xbyak::CodeGenerator()コンストラクタインタフェース - -@param maxSize [in] コード生成最大サイズ(デフォルト4096byte) -@param userPtr [in] ユーザ指定メモリ - -CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0); - -デフォルトコードサイズは4096(=DEFAULT_MAX_CODE_SIZE)バイトです。 -それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください。 - -class Quantize : public Xbyak::CodeGenerator { -public: - Quantize() - : CodeGenerator(8192) - { - } - ... -}; - -またユーザ指定メモリをコード生成最大サイズと共に指定すると、CodeGeneratorは -指定されたメモリ上にバイト列を生成します。 - -補助関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と -与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress() -も用意しました。詳細はsample/test0.cppのuse memory allocated by userを参考に -してください。 - -/** - change exec permission of memory - @param addr [in] buffer address - @param size [in] buffer size - @param canExec [in] true(enable to exec), false(disable to exec) - @return true(success), false(failure) -*/ -bool CodeArray::protect(const void *addr, size_t size, bool canExec); - -/** - get aligned memory pointer -*/ -uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE); - -・read/execモード -デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。 -コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。 - -struct Code : Xbyak::CodeGenerator { - Code() - : Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成 - { - mov(eax, 123); - ret(); - } -}; - -Code c; -c.setProtectModeRE(); // read/execモードに変更 -// JIT領域を実行 - -AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。 - -struct Code : Xbyak::CodeGenerator { - Code() - : Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成 - { - mov(eax, 123); - ret(); - } -}; - -Code c; -c.readyRE(); // read/exeモードに変更 -// JIT領域を実行 - -setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。 - - -その他詳細は各種サンプルを参照してください。 ------------------------------------------------------------------------------ -◎マクロ - -32bit環境上でコンパイルするとXBYAK32が、64bit環境上でコンパイルするとXBYAK64が -定義されます。さらに64bit環境上ではWindows(VC)ならXBYAK64_WIN、cygwin, gcc上では -XBYAK64_GCCが定義されます。 - ------------------------------------------------------------------------------ -◎使用例 - -test0.cpp ; 簡単な例(x86, x64) -quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86) -calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64) - boost(http://www.boost.org/)が必要 -bf.cpp ; JIT Brainfuck(x86, x64) - ------------------------------------------------------------------------------ -◎ライセンス - -修正された新しいBSDライセンスに従います。 -http://opensource.org/licenses/BSD-3-Clause - -sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から -いただきました。 - ------------------------------------------------------------------------------ -◎履歴 - -2021/05/09 ver 5.992 endbr32とendbr64のサポート -2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制 -2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito) -2020/10/17 ver 5.98 [scale * reg]のサポート -2020/09/08 ver 5.97 uint32などをuint32_tに置換 -2020/08/28 ver 5.95 レジスタクラスのコンストラクタがconstexprに対応(C++14以降) -2020/08/04 ver 5.941 `CodeGenerator::reset()`が`ClearError()`を呼ぶように変更 -2020/07/28 ver 5.94 #include の削除 (only windows) -2020/07/21 ver 5.93 例外なしモード追加 -2020/06/30 ver 5.92 Intel AMX命令サポート (Thanks to nshustrov) -2020/06/19 ver 5.913 32ビット環境でXBYAK64を定義したときのmov(r64, imm64)を修正 -2020/06/19 ver 5.912 macOSの古いXcodeでもMAP_JITを有効にする(Thanks to rsdubtso) -2020/05/10 ver 5.911 Linux/macOSでXBYAK_USE_MMAP_ALLOCATORがデフォルト有効になる -2020/04/20 ver 5.91 マスクレジスタk0を受け入れる(マスクをしない) -2020/04/09 ver 5.90 kmov{b,w,d,q}がサポートされないレジスタを受けると例外を投げる -2020/02/26 ver 5.891 zm0のtype修正 -2020/01/03 ver 5.89 vfpclasspdの処理エラー修正 -2019/12/20 ver 5.88 Windowsでのコンパイルエラー修正 -2019/12/19 ver 5.87 未定義ラベルへのjmp命令のデフォルト挙動をT_NEARにするsetDefaultJmpNEAR()を追加 -2019/12/13 ver 5.86 [変更] -fno-operator-namesが指定されたときは5.84以前の挙動に戻す -2019/12/07 ver 5.85 mmapにMAP_JITフラグを追加(macOS mojave以上) -2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESが定義されていない限りXBYAK_NO_OP_NAMESが定義されるように変更 -2019/10/12 ver 5.83 exit(1)の除去 -2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam) -2019/09/14 ver 5.81 いくつかの一般命令をサポート -2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov) -2019/05/27 support vp2intersectd, vp2intersectq (not tested) -2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps -2019/04/27 ver 5.79 vcmppd/vcmppsのptr_b対応忘れ(thanks to jkopinsky) -2019/04/15 ver 5.78 Reg::changeBit()のリファクタリング(thanks to MerryMage) -2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov) -2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel) -2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元 -2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す -2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除 -2018/10/15 util::StackFrameでmovの代わりにpush/popを使う -2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整 -2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) -2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加 -2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加 -2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday) -2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm -2018/07/26 ver 5.661 mingw64対応 -2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加 -2018/06/26 ver 5.65 fix push(qword [mem]) -2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正 -2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem) -2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso -2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた) -2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加 -2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf) -2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加 -2017/08/18 ver 5.52 align修正(thanks to MerryMage) -2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen) -2017/08/08 ver 5.50 mpx追加(thanks to magurosan) -2017/08/08 ver 5.45 sha追加(thanks to magurosan) -2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso) -2017/07/12 ver 5.432 PVS-studioの警告を減らす -2017/07/09 ver 5.431 hasRex()修正 (影響なし) (thanks to drillsar) -2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed) -2017/05/13 ver 5.42 movs{b,w,d,q}追加 -2017/01/26 ver 5.41 prefetcwt1追加とscale == 0対応(thanks to rsdubtso) -2016/12/14 ver 5.40 Labelが示すアドレスを取得するLabel::getAddress()追加 -2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso) -2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正 -2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro) -2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更 -2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令の追加(thanks to rsdubtso) -2016/11/26 ver 5.20 AVX512_4VNNIとAVX512_4FMAPSの判定追加(thanks to rsdubtso) -2016/11/20 ver 5.11 何故か消えていたvptest for ymm追加(thanks to gregory38) -2016/11/20 ver 5.10 [rip+&var]の形のアドレッシング追加 -2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYの判定ミス修正(thanks to PVS-Studio) -2016/08/15 ver 5.02 xbyak_bin2hex.hをincludeしない -2016/08/15 ver 5.011 gcc 5.4のバージョン取得ミスの修正 -2016/08/03 ver 5.01 AVXの省略表記非サポート -2016/07/24 ver 5.00 avx-512フルサポート -2016/06/13 avx-512 opmask命令サポート -2016/05/05 ver 4.91 AVX-512命令の検出サポート -2016/03/14 ver 4.901 ready()関数にコメント加筆(thanks to skmp) -2016/02/04 ver 4.90 条件分岐命令にjcc(const void *addr);のタイプを追加 -2016/01/30 ver 4.89 vpblendvbがymmレジスタをサポートしていなかった(thanks to John Funnell) -2016/01/24 ver 4.88 lea, cmovの16bitレジスタ対応(thanks to whyisthisfieldhere) -2015/08/16 ver 4.87 セグメントセレクタに対応 -2015/08/16 ver 4.86 [rip + label]アドレッシングで即値を使うと壊れる(thanks to whyisthisfieldhere) -2015/08/10 ver 4.85 Address::operator==()が間違っている(thanks to inolen) -2015/07/22 ver 4.84 call()がvariadic template対応 -2015/05/24 ver 4.83 mobveサポート(thanks to benvanik) -2015/05/24 ver 4.82 F16Cが使えるかどうかの判定追加 -2015/04/25 ver 4.81 setSizeが例外を投げる条件を修正(thanks to whyisthisfieldhere) -2015/04/22 ver 4.80 rip相対でLabelのサポート(thanks to whyisthisfieldhere) -2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacのサポート -2014/10/14 ver 4.70 MmapAllocatorのサポート -2014/06/13 ver 4.62 VC2014で警告抑制 -2014/05/30 ver 4.61 bt, bts, btr, btcのサポート -2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phのサポート -2014/04/11 ver 4.52 rdrandの判定追加 -2014/03/25 ver 4.51 参照されなくなったラベルの状態を削除する -2014/03/16 ver 4.50 新しいラベルクラスのサポート -2014/03/05 ver 4.40 VirtualBox上でBMI/enhanced repのサポート判定を間違うことがあるのを修正 -2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()のサポート -2013/10/16 ver 4.21 ラベルでstd::stringを受け付ける。 -2013/07/30 ver 4.20 [break backward compatibility] 従来のReg32eクラスをアドレッシング用のRegExpとReg32, Reg64を表すReg32eに分離 -2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorの型をenumからclassに変更 -2013/06/21 ver 4.02 LABELの指すアドレスを書き込むputL(LABEL)関数の追加。 -2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm) - support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest) -2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサポート -2013/03/27 ver 3.80 mov(reg, "label");をサポート -2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加 -2013/01/15 ver 3.75 生成されたコードを修正するためにsetSize()を追加 -2013/01/12 ver 3.74 CodeGenerator::reset()とAllocator::useProtect()を追加 -2013/01/06 ver 3.73 可能ならunordered_mapを使う -2012/12/04 ver 3.72 eaxなどをCodeGeneratorのメンバ変数に戻す. Xbyak::util::eaxはstatic const変数 -2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()をXBYAK_NO_OP_NAMESが定義されていないときでも使えるようにした -2012/11/17 CodeGeneratorのeax, ecx, ptrなどのメンバ変数をstaticにし、const参照をXbyak::utilにも定義 -2012/11/09 ver 3.70 and()をand_()にするためのマクロXBYAK_NO_OP_NAMESを追加(thanks to Mattias) -2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit -2012/11/01 ver 3.61 add fldcw/fstcw -2012/05/03 ver 3.60 Allocatorクラスのインタフェースを変更 -2012/03/23 ver 3.51 userPtrモードがバグったのを修正 -2012/03/19 ver 3.50 AutoGrowモードサポート -2011/11/09 ver 3.05 rip相対の64bitサイズ以外の扱いのバグ修正 / movsxdサポート -2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);などにおけるimm8の扱いのバグ修正(thanks to lolcat) -2011/06/16 ver 3.03 Macのgcc上での__GNUC_PREREQがミスってたのを修正(thanks to t_teruya) -2011/04/28 ver 3.02 Macのgcc上ではxgetbvをdisable -2011/03/24 ver 3.01 fix typo of OSXSAVE -2011/03/23 ver 3.00 vcmpeqpsなどを追加 -2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) -2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe -2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm -2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest) -2011/02/04 ver 2.99 beta support AVX -2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp -2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist -2010/07/07 ver 2.29 fix call(