diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 336330bd..892fdbcb 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -2042,6 +2042,9 @@ void put64() } } } + // encodekey + puts("void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }"); + puts("void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }"); } void putAMX_TILE() diff --git a/test/apx.cpp b/test/apx.cpp index 0b99e20c..b2675b87 100644 --- a/test/apx.cpp +++ b/test/apx.cpp @@ -1753,6 +1753,7 @@ CYBOZU_TEST_AUTO(kmov) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } + CYBOZU_TEST_AUTO(amx) { struct Code : Xbyak::CodeGenerator { @@ -1835,3 +1836,37 @@ CYBOZU_TEST_AUTO(aeskl) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } + +CYBOZU_TEST_AUTO(encodekey) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + encodekey128(eax, ebx); + encodekey128(eax, r8d); + encodekey128(r8d, ebx); + encodekey128(r30d, r29d); + + encodekey256(eax, ebx); + encodekey256(eax, r8d); + encodekey256(r8d, ebx); + encodekey256(r30d, r29d); + } + } c; + const uint8_t tbl[] = { + // encodekey128 + 0xf3, 0x0f, 0x38, 0xfa, 0xc3, + 0x62, 0xd4, 0x7e, 0x08, 0xda, 0xc0, + 0x62, 0x74, 0x7e, 0x08, 0xda, 0xc3, + 0x62, 0x4c, 0x7e, 0x08, 0xda, 0xf5, + // encodekey256 + 0xf3, 0x0f, 0x38, 0xfb, 0xc3, + 0x62, 0xd4, 0x7e, 0x08, 0xdb, 0xc0, + 0x62, 0x74, 0x7e, 0x08, 0xdb, 0xc3, + 0x62, 0x4c, 0x7e, 0x08, 0xdb, 0xf5, + }; + const size_t n = sizeof(tbl); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index ef40b993..df003896 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -231,6 +231,7 @@ enum { ERR_INVALID_ZU, ERR_CANT_USE_REX2, ERR_INVALID_DFV, + ERR_INVALID_REG_IDX, ERR_INTERNAL // Put it at last. }; @@ -288,6 +289,7 @@ inline const char *ConvertErrorToString(int err) "invalid ZU", "can't use rex2", "invalid dfv", + "invalid reg index", "internal error" }; assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl)); @@ -2738,13 +2740,21 @@ class CodeGenerator : public CodeArray { } void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code) { - if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_BAD_COMBINATION) + if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX) if (addr.hasRex2()) { opROO(Reg(), addr, *x, type2, code); return; } opRO(*x, addr, type1, code); } + void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2) + { + if (r1.getIdx() < 8 && r2.getIdx() < 8) { + db(0xF3); db(0x0F); db(0x38); db(code1); setModRM(3, r1.getIdx(), r2.getIdx()); + return; + } + opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2); + } public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index a904180e..b08ce7d0 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1934,6 +1934,8 @@ void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0 void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); } void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } +void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); } +void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); } void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); } void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); } void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }