diff -Nru xbyak-7.02/CMakeLists.txt xbyak-7.05/CMakeLists.txt --- xbyak-7.02/CMakeLists.txt 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/CMakeLists.txt 2024-01-03 11:13:13.000000000 +0000 @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.5) -project(xbyak LANGUAGES CXX VERSION 7.02) +project(xbyak LANGUAGES CXX VERSION 7.05) file(GLOB headers xbyak/*.h) diff -Nru xbyak-7.02/debian/changelog xbyak-7.05/debian/changelog --- xbyak-7.02/debian/changelog 2023-12-24 12:13:15.000000000 +0000 +++ xbyak-7.05/debian/changelog 2024-01-12 09:01:26.000000000 +0000 @@ -1,3 +1,9 @@ +xbyak (7.05-1) unstable; urgency=medium + + * New upstream version 7.05 + + -- Andrea Pappacoda Fri, 12 Jan 2024 10:01:26 +0100 + xbyak (7.02-1) unstable; urgency=medium * New upstream version 7.02 diff -Nru xbyak-7.02/doc/changelog.md xbyak-7.05/doc/changelog.md --- xbyak-7.02/doc/changelog.md 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/doc/changelog.md 2024-01-03 11:13:13.000000000 +0000 @@ -1,5 +1,8 @@ # History +* 2024/Jan/03 ver 7.05 support RAO-INT for APX +* 2023/Dec/28 ver 7.04 rex2 supports two-byte opecode +* 2023/Dec/26 ver 7.03 set the default value of dfv to 0 * 2023/Dec/20 ver 7.02 SHA* support APX * 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX * 2023/Dec/01 ver 7.00 support APX diff -Nru xbyak-7.02/doc/usage.md xbyak-7.05/doc/usage.md --- xbyak-7.02/doc/usage.md 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/doc/usage.md 2024-01-03 11:13:13.000000000 +0000 @@ -145,6 +145,16 @@ - `imul(ax|T_zu, cx, 0x1234);` // Set ND=ZU - `imul(ax|T_zu|T_nf, cx, 0x1234);` // Set ND=ZU and EVEX.NF=1 - `setb(r31b|T_zu);` // same as set(r31b); movzx(r31, r31b); + - See [sample/zero_upper.cpp](../sample/zero_upper.cpp) + +### ccmpSCC and ctestSCC + +- ccmpSCC(op1, op2, dfv = 0); // eflags = eflags == SCC ? cmp(op1, op2) : dfv +- ctestSCC(op1, op2, dfv = 0); // eflags = eflags == SCC ? test(op1, op2) : dfv +- SCC means source condition code such as z, a, gt. +- See [sample/ccmp.cpp](../sample/ccmp.cpp) +- Specify the union of T_of(=8), T_sf(=4), T_zf(=2), or T_cf(=1) for dfv. + ## Label Two kinds of Label are supported. (String literal and Label class). diff -Nru xbyak-7.02/gen/avx_type.hpp xbyak-7.05/gen/avx_type.hpp --- xbyak-7.02/gen/avx_type.hpp 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/gen/avx_type.hpp 2024-01-03 11:13:13.000000000 +0000 @@ -64,10 +64,8 @@ if (type & T_MEM_EVEX) str += "|T_MEM_EVEX"; if (type & T_NF) str += "|T_NF"; if (type & T_CODE1_IF1) str += "|T_CODE1_IF1"; - if (type & T_MAP3) str += "|T_MAP3"; if (type & T_ND1) str += "|T_ND1"; if (type & T_ZU) str += "|T_ZU"; - if (type & T_MAP1) str += "|T_MAP1"; if (str[0] == '|') str = str.substr(1); return str; diff -Nru xbyak-7.02/gen/avx_type_def.h xbyak-7.05/gen/avx_type_def.h --- xbyak-7.02/gen/avx_type_def.h 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/gen/avx_type_def.h 2024-01-03 11:13:13.000000000 +0000 @@ -43,13 +43,10 @@ static const uint64_t T_MAP6 = T_FP16 | T_0F38; static const uint64_t T_NF = 1ull << 32; // T_nf static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8) - static const uint64_t T_MAP3 = 1ull << 34; // rorx only + static const uint64_t T_ND1 = 1ull << 35; // ND=1 static const uint64_t T_ZU = 1ull << 36; // ND=ZU static const uint64_t T_F2 = 1ull << 37; // pp = 3 - static const uint64_t T_MAP1 = 1ull << 38; // kmov // T_66 = 1, T_F3 = 2, T_F2 = 3 static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; } - static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; } - // @@@end of avx_type_def.h diff -Nru xbyak-7.02/gen/gen_code.cpp xbyak-7.05/gen/gen_code.cpp --- xbyak-7.02/gen/gen_code.cpp 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/gen/gen_code.cpp 2024-01-03 11:13:13.000000000 +0000 @@ -630,15 +630,15 @@ printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg); printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); - printf("void set%s(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | %d)) return; opRext(op, 8, 0, T_0F, 0x90 | %d); }%s\n", p->name, p->ext, p->ext, msg); + printf("void set%s(const Operand& op) { opSetCC(op, %d); }%s\n", p->name, p->ext, msg); // ccmpscc // true if SCC = 0b1010, false if SCC = 0b1011 (see APX Architecture Specification p.266) const char *s = p->ext == 10 ? "t" : p->ext == 11 ? "f" : p->name; - printf("void ccmp%s(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, %d); }\n", s, p->ext); - printf("void ccmp%s(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, %d); }\n", s, p->ext); - printf("void ctest%s(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, %d); }\n", s, p->ext); - printf("void ctest%s(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, %d); }\n", s, p->ext); + printf("void ccmp%s(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, %d); }\n", s, p->ext); + printf("void ccmp%s(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, %d); }\n", s, p->ext); + printf("void ctest%s(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, %d); }\n", s, p->ext); + printf("void ctest%s(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, %d); }\n", s, p->ext); } } { @@ -860,14 +860,13 @@ const char *prefix; } tbl[] = { { "aadd", "" }, - { "aand", " | T_66" }, - { "aor", " | T_F2" }, - { "axor", " | T_F3" }, + { "aand", "|T_66" }, + { "aor", "|T_F2" }, + { "axor", "|T_F3" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Address& addr, const Reg32e ®) { ", p->name); - printf("opMR(addr, reg, T_0F38%s, 0x0FC); }\n", p->prefix); + printf("void %s(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38%s, 0x0FC, T_APX%s); }\n", p->name, p->prefix, p->prefix); } } @@ -1144,16 +1143,15 @@ // misc { puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); }"); - puts("void bswap(const Reg32e& reg) { opRR(Reg32(1), reg, 0, 0x0F); }"); puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }"); puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }"); puts("void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }"); puts("void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }"); - puts("void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); }"); - puts("void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); }"); - puts("void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(), addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); }"); - puts("void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); }"); + puts("void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }"); + puts("void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }"); + puts("void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }"); + puts("void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }"); puts("void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); }"); puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }"); @@ -1831,7 +1829,7 @@ const Tbl& p = tbl[i]; printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, %s, 0x%x); }\n", p.name, type2String(p.type).c_str(), p.code); } - puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX|T_MAP3, 0xF0, imm); }"); + puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX, 0xF0, imm); }"); } // gpr(reg, r/m) { diff -Nru xbyak-7.02/meson.build xbyak-7.05/meson.build --- xbyak-7.02/meson.build 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/meson.build 2024-01-03 11:13:13.000000000 +0000 @@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '7.02', + version: '7.05', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) diff -Nru xbyak-7.02/readme.md xbyak-7.05/readme.md --- xbyak-7.02/readme.md 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/readme.md 2024-01-03 11:13:13.000000000 +0000 @@ -1,5 +1,5 @@ -# Xbyak 7.02 [![Badge Build]][Build Status] +# Xbyak 7.05 [![Badge Build]][Build Status] *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)* @@ -33,6 +33,7 @@ ### News +- support RAO-INT for APX - support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE - support APX except for a few instructions - add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma diff -Nru xbyak-7.02/readme.txt xbyak-7.05/readme.txt --- xbyak-7.02/readme.txt 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/readme.txt 2024-01-03 11:13:13.000000000 +0000 @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.02 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.05 ----------------------------------------------------------------------------- ◎概要 @@ -404,6 +404,9 @@ ----------------------------------------------------------------------------- ◎履歴 +2024/01/03 ver 7.05 APX対応RAO-INT +2023/12/28 ver 7.04 2バイトオペコードのrex2対応 +2023/12/26 ver 7.03 dfvのデフォルト値を0に設定 2023/12/20 ver 7.02 SHA*のAPX対応 2023/12/19 ver 7.01 AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE対応 APX10/APX判定対応 2023/12/01 ver 7.00 APX対応 diff -Nru xbyak-7.02/sample/Makefile xbyak-7.05/sample/Makefile --- xbyak-7.02/sample/Makefile 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/sample/Makefile 2024-01-03 11:13:13.000000000 +0000 @@ -30,7 +30,7 @@ endif ifeq ($(BIT),64) -TARGET += test64 bf64 memfunc64 test_util64 jmp_table64 +TARGET += test64 bf64 memfunc64 test_util64 jmp_table64 zero_upper ccmp no_flags ifeq ($(BOOST_EXIST),1) TARGET += calc64 #calc2_64 endif @@ -103,6 +103,18 @@ $(CXX) $(CFLAGS) profiler.cpp -o $@ profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h $(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl +zero_upper: zero_upper.cpp $(XBYAK_INC) + $(CXX) $(CFLAGS) zero_upper.cpp -o $@ +test_zero_upper: zero_upper + sde -future -- ./zero_upper +ccmp: ccmp.cpp $(XBYAK_INC) + $(CXX) $(CFLAGS) ccmp.cpp -o $@ +test_ccmp: ccmp + sde -future -- ./ccmp +no_flags: no_flags.cpp $(XBYAK_INC) + $(CXX) $(CFLAGS) no_flags.cpp -o $@ +test_no_flags: no_flags + sde -future -- ./no_flags clean: rm -rf $(TARGET) profiler profiler-vtune diff -Nru xbyak-7.02/sample/ccmp.cpp xbyak-7.05/sample/ccmp.cpp --- xbyak-7.02/sample/ccmp.cpp 1970-01-01 00:00:00.000000000 +0000 +++ xbyak-7.05/sample/ccmp.cpp 2024-01-03 11:13:13.000000000 +0000 @@ -0,0 +1,68 @@ +/* + An example of ccmp + > g++ ccmp.cpp -I ../xbyak + > sde -future -- ./a.out +*/ +#include +#include +#include + +using namespace Xbyak; + +struct Code1 : Xbyak::CodeGenerator { + Code1() + { + Xbyak::util::StackFrame sf(this, 2); + const auto& p1 = sf.p[0]; + const auto& p2 = sf.p[1]; + int dfv = 0; + cmp(p1, 3); + ctesta(p2, 1, dfv); // eflags = (p1 > 3) ? ((p2 & 1) == 0) : dfv; + setz(al|T_zu); + } +}; + +struct Code2 : Xbyak::CodeGenerator { + Code2() + { + Xbyak::util::StackFrame sf(this, 3); + const auto& p1 = sf.p[0]; + const auto& p2 = sf.p[1]; + const auto& p3 = sf.p[2]; + int dfv = 0; + cmp(p1, 1); + ccmpe(p2, 2, dfv); // eflags = p1==1 ? p2==2 : dfv; + ccmpe(p3, 3, dfv); // eflags = (p1==1 && p2==2) ? p3==3 : dfv; + setz(al|T_zu); // p1==1 && p2==2 && p3==3 + } +}; + + +int main() + try +{ + { + puts("(p1 > 3) && ((p2 & 1) == 0)"); + Code1 c; + auto f = c.getCode(); + for (int p1 = 2; p1 < 5; p1++) { + for (int p2 = 0; p2 < 3; p2++) { + printf("p1=%d p2=%d ret=%d (%d)\n", p1, p2, f(p1, p2), p1 > 3 && ((p2&1) == 0)); + } + } + } + { + puts("p1 == 1 && p2 == 2 && p3 == 3"); + Code2 c; + auto f = c.getCode(); + for (int p1 = 0; p1 < 3; p1++) { + for (int p2 = 1; p2 < 4; p2++) { + for (int p3 = 2; p3 < 5; p3++) { + printf("p1=%d p2=%d p3=%d ret=%d (%d)\n", p1, p2, p3, f(p1, p2, p3), p1==1 && p2==2 && p3==3); + } + } + } + } +} catch (std::exception& e) { + printf("ERR %s\n", e.what()); +} diff -Nru xbyak-7.02/sample/no_flags.cpp xbyak-7.05/sample/no_flags.cpp --- xbyak-7.02/sample/no_flags.cpp 1970-01-01 00:00:00.000000000 +0000 +++ xbyak-7.05/sample/no_flags.cpp 2024-01-03 11:13:13.000000000 +0000 @@ -0,0 +1,25 @@ +#include +#include + +struct Code : Xbyak::CodeGenerator { + Code(bool nf) { + xor_(eax, eax); // CF = 0 + mov(eax, -1); + if (nf) { + puts("no flags (with T_nf)"); + add(eax|T_nf, eax, 1); // does not change CF + } else { + puts("change flags (without T_nf)"); + add(eax, eax, 1); // CF = 1 + } + adc(eax, 0); // eax = CF ? 1 : 0 + ret(); + } +}; + +int main() { + for (int i = 0; i < 2; i++) { + Code c(i); + printf("i=%d ret=%d\n", i, c.getCode()()); + } +} diff -Nru xbyak-7.02/sample/zero_upper.cpp xbyak-7.05/sample/zero_upper.cpp --- xbyak-7.02/sample/zero_upper.cpp 1970-01-01 00:00:00.000000000 +0000 +++ xbyak-7.05/sample/zero_upper.cpp 2024-01-03 11:13:13.000000000 +0000 @@ -0,0 +1,48 @@ +/* + An example of T_zu (zero upper) flag + > g++ zero_upper.cpp -I ../xbyak + > sde -future -- ./a.out +*/ +#include +#include + +using namespace Xbyak; + +struct Code : Xbyak::CodeGenerator { + Code(int mode) + { + mov(eax, 0x12345678); + cmp(eax, eax); // ZF=1 + switch (mode) { + case 0: // imul + puts("imul"); + imul(ax,ax, 0x1234); + break; + case 1: // imul+zu + puts("imul+zu"); + imul(ax|T_zu, ax, 0x1234); + break; + case 2: // setz + puts("setz"); + setz(al); + break; + case 3: // setz+zu + puts("setz+zu"); + setz(al|T_zu); + break; + } + ret(); + } +}; + +int main() + try +{ + for (int mode = 0; mode < 4; mode++) { + Code c(mode); + auto f = c.getCode(); + printf("ret=%08x\n", f()); + } +} catch (std::exception& e) { + printf("ERR %s\n", e.what()); +} diff -Nru xbyak-7.02/test/apx.cpp xbyak-7.05/test/apx.cpp --- xbyak-7.02/test/apx.cpp 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/test/apx.cpp 2024-01-03 11:13:13.000000000 +0000 @@ -1074,6 +1074,13 @@ setb(r31b|T_zu); setb(r15b|T_zu); setb(ptr [r30]); + + bswap(eax); + bswap(r8d); + bswap(r16d); + bswap(rcx); + bswap(r9); + bswap(r17); } } c; const uint8_t tbl[] = { @@ -1101,7 +1108,13 @@ 0x62, 0xdc, 0x7f, 0x18, 0x42, 0xc7, 0x62, 0xd4, 0x7f, 0x18, 0x42, 0xc7, 0x62, 0xdc, 0x7f, 0x08, 0x42, 0x06, - + // bswap + 0x0f, 0xc8, + 0x41, 0x0f, 0xc8, + 0xd5, 0x90, 0xc8, + 0x48, 0x0f, 0xc9, + 0x49, 0x0f, 0xc9, + 0xd5, 0x98, 0xc9, }; const size_t n = sizeof(tbl); CYBOZU_TEST_EQUAL(c.getSize(), n); @@ -1895,6 +1908,57 @@ }; const size_t n = sizeof(tbl); CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(0x0f_rex2) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + addps(xmm3, ptr [r30+r20*4+0x4]); + movups(xmm5, ptr [r16]); + movq(r31, xmm5); + cvtsd2si(r20, ptr [r30]); + bsr(r20, r30); + } + } c; + const uint8_t tbl[] = { + 0xd5, 0xb1, 0x58, 0x5c, 0xa6, 0x04, + 0xd5, 0x90, 0x10, 0x28, 0x66, + 0xd5, 0x99, 0x7e, 0xef, 0xf2, + 0xd5, 0xd9, 0x2d, 0x26, + 0xd5, 0xd9, 0xbd, 0xe6, + + }; + const size_t n = sizeof(tbl); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(rao_int) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + aadd(ptr [r16+r31*1], r17d); + aadd(ptr [r16+r31*1], r17); + aand(ptr [r16+r31*1], r17d); + aand(ptr [r16+r31*1], r17); + aor(ptr [r16+r31*1], r17d); + aor(ptr [r16+r31*1], r17); + axor(ptr [r16+r31*1], r17d); + axor(ptr [r16+r31*1], r17); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xac, 0x78, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf8, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, + 0x79, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf9, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7b, 0x08, + 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xfb, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7a, 0x08, 0xfc, 0x0c, + 0x38, 0x62, 0xac, 0xfa, 0x08, 0xfc, 0x0c, 0x38, + }; + const size_t n = sizeof(tbl); + CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } diff -Nru xbyak-7.02/xbyak/xbyak.h xbyak-7.05/xbyak/xbyak.h --- xbyak-7.02/xbyak/xbyak.h 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/xbyak/xbyak.h 2024-01-03 11:13:13.000000000 +0000 @@ -155,7 +155,7 @@ enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x7020 /* 0xABCD = A.BC(.D) */ + VERSION = 0x7050 /* 0xABCD = A.BC(.D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -727,6 +727,7 @@ bool operator==(const Operand& rhs) const; bool operator!=(const Operand& rhs) const { return !operator==(rhs); } const Address& getAddress() const; + Address getAddress(int immSize) const; const Reg& getReg() const; }; @@ -1298,15 +1299,15 @@ M_ripAddr }; XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) - : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast), optimize_(true) + : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) { e_.verify(); } #ifdef XBYAK64 explicit XBYAK_CONSTEXPR Address(size_t disp) - : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false), optimize_(true) { } + : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), immSize(0), disp8N(0), permitVsib(false), broadcast_(false), optimize_(true) { } XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr) - : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast), optimize_(true) { } + : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) { } #endif RegExp getRegExp() const { @@ -1323,7 +1324,7 @@ const Label* getLabel() const { return label_; } bool operator==(const Address& rhs) const { - return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_; + return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && immSize == rhs.immSize && disp8N == rhs.disp8N && permitVsib == rhs.permitVsib && broadcast_ == rhs.broadcast_ && optimize_ == rhs.optimize_; } bool operator!=(const Address& rhs) const { return !operator==(rhs); } bool isVsib() const { return e_.isVsib(); } @@ -1331,6 +1332,11 @@ RegExp e_; const Label* label_; Mode mode_; +public: + int immSize; // the size of immediate value of nmemonics (0, 1, 2, 4) + int disp8N; // 0(normal), 1(force disp32), disp8N = {2, 4, 8} + bool permitVsib; +private: bool broadcast_; bool optimize_; }; @@ -1340,6 +1346,12 @@ assert(isMEM()); return static_cast(*this); } +inline Address Operand::getAddress(int immSize) const +{ + Address addr = getAddress(); + addr.immSize = immSize; + return addr; +} inline bool Operand::operator==(const Operand& rhs) const { @@ -1736,14 +1748,15 @@ db(0xD5); db((rexRXB(4, bit3, r, b, x) << 4) | rex4bit); } - void rex(const Operand& op1, const Operand& op2 = Operand(), uint64_t type = 0) + // return true if rex2 is selected + bool rex(const Operand& op1, const Operand& op2 = Operand(), uint64_t type = 0) { - if (op1.getNF() | op2.getNF()) XBYAK_THROW(ERR_INVALID_NF) - if (op1.getZU() | op2.getZU()) XBYAK_THROW(ERR_INVALID_ZU) + if (op1.getNF() | op2.getNF()) XBYAK_THROW_RET(ERR_INVALID_NF, false) + if (op1.getZU() | op2.getZU()) XBYAK_THROW_RET(ERR_INVALID_ZU, false) uint8_t rex = 0; const Operand *p1 = &op1, *p2 = &op2; if (p1->isMEM()) std::swap(p1, p2); - if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false) // except movsx(16bit, 32/64bit) bool p66 = (op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e)); if ((type & T_66) || p66) db(0x66); @@ -1753,6 +1766,7 @@ if (type & T_F3) { db(0xF3); } + bool is0F = type & T_0F; if (p2->isMEM()) { const Reg& r = *static_cast(p1); const Address& addr = p2->getAddress(); @@ -1762,9 +1776,9 @@ if (BIT == 64 && addr.is32bit()) db(0x67); rex = rexRXB(3, r.isREG(64), r, base, idx); if (r.hasRex2() || addr.hasRex2()) { - if (type & (T_0F|T_0F38|T_0F3A)) XBYAK_THROW(ERR_CANT_USE_REX2) - rex2(0, rex, r, base, idx); - return; + if (type & (T_0F38|T_0F3A)) XBYAK_THROW_RET(ERR_CANT_USE_REX2, false) + rex2(is0F, rex, r, base, idx); + return true; } if (rex || r.isExt8bit()) rex |= 0x40; } else { @@ -1773,13 +1787,14 @@ // ModRM(reg, base); rex = rexRXB(3, r1.isREG(64) || r2.isREG(64), r2, r1); if (r1.hasRex2() || r2.hasRex2()) { - if (type & (T_0F|T_0F38|T_0F3A)) XBYAK_THROW(ERR_CANT_USE_REX2) - rex2(0, rex, r2, r1); - return; + if (type & (T_0F38|T_0F3A)) XBYAK_THROW_RET(ERR_CANT_USE_REX2, 0) + rex2(is0F, rex, r2, r1); + return true; } if (rex || r1.isExt8bit() || r2.isExt8bit()) rex |= 0x40; } if (rex) db(rex); + return false; } // @@@begin of avx_type_def.h static const uint64_t T_NONE = 0ull; @@ -1826,16 +1841,14 @@ static const uint64_t T_MAP6 = T_FP16 | T_0F38; static const uint64_t T_NF = 1ull << 32; // T_nf static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8) - static const uint64_t T_MAP3 = 1ull << 34; // rorx only + static const uint64_t T_ND1 = 1ull << 35; // ND=1 static const uint64_t T_ZU = 1ull << 36; // ND=ZU static const uint64_t T_F2 = 1ull << 37; // pp = 3 - static const uint64_t T_MAP1 = 1ull << 38; // kmov // T_66 = 1, T_F3 = 2, T_F2 = 3 static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; } - static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; } - // @@@end of avx_type_def.h + static inline uint32_t getMap(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; } void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false) { int w = (type & T_W1) ? 1 : 0; @@ -1849,7 +1862,7 @@ if (!b && !x && !w && (type & T_0F)) { db(0xC5); db((r ? 0 : 0x80) | vvvv); } else { - uint32_t mmmm = getMMM(type); + uint32_t mmmm = getMap(type); db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); } db(code); @@ -1876,7 +1889,7 @@ { if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0) int w = (type & T_EW1) ? 1 : 0; - uint32_t mmm = getMMM(type); + uint32_t mmm = getMap(type); if (type & T_FP16) mmm |= 4; uint32_t pp = getPP(type); int idx = v ? v->getIdx() : 0; @@ -1928,17 +1941,10 @@ db(code); return disp8N; } - static inline int getMap(uint64_t type) - { - if (type & T_MAP1) return 1; - if (type & T_MAP3) return 3; - if (type & (T_0F38|T_0F3A)) return 2; - return 4; // legacy - } // evex of Legacy void evexLeg(const Reg& r, const Reg& b, const Reg& x, const Reg& v, uint64_t type, int sc = NONE) { - int M = getMap(type); + int M = getMap(type); if (M == 0) M = 4; // legacy int R3 = !r.isExtIdx(); int X3 = !x.isExtIdx(); int B3 = b.isExtIdx() ? 0 : 0x20; @@ -2031,9 +2037,9 @@ } LabelManager labelMgr_; bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } - void writeCode(uint64_t type, const Reg& r, int code) + void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false) { - if (!(type & T_APX)) { + if (!(type&T_APX || rex2)) { if (type & T_0F) { db(0x0F); } else if (type & T_0F38) { @@ -2046,16 +2052,18 @@ } void opRR(const Reg& reg1, const Reg& reg2, uint64_t type, int code) { - rex(reg2, reg1, type); - writeCode(type, reg1, code); + bool rex2 = rex(reg2, reg1, type); + writeCode(type, reg1, code, rex2); setModRM(3, reg1.getIdx(), reg2.getIdx()); } - void opMR(const Address& addr, const Reg& r, uint64_t type, int code, int immSize = 0) + void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE) { + if (code2 == NONE) code2 = code; + if (type2 && opROO(Reg(), addr, r, type2, code2)) return; if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) - rex(addr, r, type); - writeCode(type, r, code); - opAddr(addr, r.getIdx(), immSize); + bool rex2 = rex(addr, r, type); + writeCode(type, r, code, rex2); + opAddr(addr, r.getIdx()); } void opLoadSeg(const Address& addr, const Reg& reg, uint64_t type, int code) { @@ -2136,21 +2144,20 @@ } // reg is reg field of ModRM // immSize is the size for immediate value - // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement - void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false) + void opAddr(const Address &addr, int reg) { - if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + if (!addr.permitVsib && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) if (addr.getMode() == Address::M_ModRM) { - setSIB(addr.getRegExp(), reg, disp8N); + setSIB(addr.getRegExp(), reg, addr.disp8N); } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) { setModRM(0, reg, 5); if (addr.getLabel()) { // [rip + Label] - putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize); + putL_inner(*addr.getLabel(), true, addr.getDisp() - addr.immSize); } else { size_t disp = addr.getDisp(); if (addr.getMode() == Address::M_ripAddr) { if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW) - disp -= (size_t)getCurr() + 4 + immSize; + disp -= (size_t)getCurr() + 4 + addr.immSize; } dd(inner::VerifyInInt32(disp)); } @@ -2207,11 +2214,12 @@ if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false) if (p2->isMEM()) { const Reg& r = *static_cast(p1); - const Address& addr = p2->getAddress(); + Address addr = p2->getAddress(); const RegExp e = addr.getRegExp(); evexLeg(r, e.getBase(), e.getIndex(), d, type, sc); writeCode(type, d, code); - opAddr(addr, r.getIdx(), immSize); + addr.immSize = immSize; + opAddr(addr, r.getIdx()); } else { evexLeg(static_cast(op2), static_cast(op1), Reg(), d, type, sc); writeCode(type, d, code); @@ -2226,13 +2234,18 @@ const Reg r(ext, Operand::REG, opBit); if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return; if (op.isMEM()) { - opMR(op.getAddress(), r, type, code, immSize); + opMR(op.getAddress(immSize), r, type, code); } else if (op.isREG(bit)) { opRR(r, op.getReg().changeBit(opBit), type, code); } else { XBYAK_THROW(ERR_BAD_COMBINATION) } } + void opSetCC(const Operand& op, int ext) + { + if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | ext)) return; + opRext(op, 8, 0, T_0F, 0x90 | ext); + } void opShift(const Operand& op, int imm, int ext, const Reg *d = 0) { if (d == 0) verifyMemHasSize(op); @@ -2252,7 +2265,7 @@ void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0) { if (op.isMEM()) { - opMR(op.getAddress(), r, type, code, immSize); + opMR(op.getAddress(immSize), r, type, code); } else if (condR) { opRR(r, op.getReg(), type, code); } else { @@ -2437,7 +2450,7 @@ void opVex(const Reg& r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) { if (op2.isMEM()) { - const Address& addr = op2.getAddress(); + Address addr = op2.getAddress(); const RegExp& regExp = addr.getRegExp(); const Reg& base = regExp.getBase(); const Reg& index = regExp.getIndex(); @@ -2456,7 +2469,10 @@ } else { vex(r, base, p1, type, code, index.isExtIdx()); } - opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0); + if (type & T_VSIB) addr.permitVsib = true; + if (disp8N) addr.disp8N = disp8N; + if (imm8 != NONE) addr.immSize = 1; + opAddr(addr, r.getIdx()); } else { const Reg& base = op2.getReg(); if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) { @@ -2726,17 +2742,17 @@ code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0; } if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION) - uint64_t type = 0; + uint64_t type = T_0F; switch (size) { - case 8: type = T_W0|T_66; break; - case 16: type = T_W0; break; - case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break; - case 64: type = isReg ? T_W1|T_F2 : T_W1; break; + case 8: type |= T_W0|T_66; break; + case 16: type |= T_W0; break; + case 32: type |= isReg ? T_W0|T_F2 : T_W1|T_66; break; + case 64: type |= isReg ? T_W1|T_F2 : T_W1; break; } const Operand *p1 = &k, *p2 = &op; if (code == 0x93) { std::swap(p1, p2); } - if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return; - opVex(static_cast(*p1), 0, *p2, T_L0|T_0F|type, code); + if (opROO(Reg(), *p2, *p1, T_APX|type, code)) return; + opVex(static_cast(*p1), 0, *p2, T_L0|type, code); } void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2) { @@ -2951,7 +2967,7 @@ if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) immSize = 4; } - opMR(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0, 0xC6, immSize); + opMR(op.getAddress(immSize), Reg(0, Operand::REG, op.getBit()), 0, 0xC6); db(static_cast(imm), immSize); } else { XBYAK_THROW(ERR_BAD_COMBINATION) @@ -3142,6 +3158,18 @@ { opROO(Reg(), op, x, T_MUST_EVEX, 0xD9); } + void bswap(const Reg32e& r) + { + int idx = r.getIdx(); + uint8_t rex = (r.isREG(64) ? 8 : 0) | ((idx & 8) ? 1 : 0); + if (idx >= 16) { + db(0xD5); db((1<<7) | (idx & 16) | rex); + } else { + if (rex) db(0x40 | rex); + db(0x0F); + } + db(0xC8 + (idx & 7)); + } /* use single byte nop if useMultiByteNop = false */ diff -Nru xbyak-7.02/xbyak/xbyak_mnemonic.h xbyak-7.05/xbyak/xbyak_mnemonic.h --- xbyak-7.02/xbyak/xbyak_mnemonic.h 2023-12-20 06:59:45.000000000 +0000 +++ xbyak-7.05/xbyak/xbyak_mnemonic.h 2024-01-03 11:13:13.000000000 +0000 @@ -1,6 +1,6 @@ -const char *getVersionString() const { return "7.02"; } -void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC); } -void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); } +const char *getVersionString() const { return "7.05"; } +void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); } +void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); } void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x10); } void adc(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 2); } @@ -34,8 +34,8 @@ void andnps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x55, isXMM_XMMorMEM); } void andpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x54, isXMM_XMMorMEM); } void andps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x54, isXMM_XMMorMEM); } -void aor(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_F2, 0x0FC); } -void axor(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_F3, 0x0FC); } +void aor(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_F2, 0x0FC, T_APX|T_F2); } +void axor(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_F3, 0x0FC, T_APX|T_F3); } void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf7); } void blendpd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0D, isXMM_XMMorMEM, static_cast(imm)); } void blendps(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0C, isXMM_XMMorMEM, static_cast(imm)); } @@ -55,7 +55,6 @@ void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, T_0F, 0x1B); } void bsf(const Reg®, const Operand& op) { opRO(reg, op, T_0F, 0xBC, op.isREG(16|i32e)); } void bsr(const Reg®, const Operand& op) { opRO(reg, op, T_0F, 0xBD, op.isREG(16|i32e)); } -void bswap(const Reg32e& reg) { opRR(Reg32(1), reg, 0, 0x0F); } void bt(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xA3, op.isREG(16|i32e) && op.getBit() == reg.getBit()); } void bt(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 4, T_0F, 0xba, false, 1); db(imm); } void btc(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xBB, op.isREG(16|i32e) && op.getBit() == reg.getBit()); } @@ -66,62 +65,62 @@ void bts(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 5, T_0F, 0xba, false, 1); db(imm); } void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf5); } void cbw() { db(0x66); db(0x98); } -void ccmpa(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 7); } -void ccmpa(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 7); } -void ccmpae(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 3); } -void ccmpae(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 3); } -void ccmpb(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 2); } -void ccmpb(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 2); } -void ccmpbe(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 6); } -void ccmpbe(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 6); } -void ccmpc(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 2); } -void ccmpc(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 2); } -void ccmpe(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 4); } -void ccmpe(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 4); } -void ccmpf(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 11); } -void ccmpf(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 11); } -void ccmpg(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 15); } -void ccmpg(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 15); } -void ccmpge(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 13); } -void ccmpge(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 13); } -void ccmpl(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 12); } -void ccmpl(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 12); } -void ccmple(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 14); } -void ccmple(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 14); } -void ccmpna(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 6); } -void ccmpna(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 6); } -void ccmpnae(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 2); } -void ccmpnae(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 2); } -void ccmpnb(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 3); } -void ccmpnb(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 3); } -void ccmpnbe(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 7); } -void ccmpnbe(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 7); } -void ccmpnc(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 3); } -void ccmpnc(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 3); } -void ccmpne(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 5); } -void ccmpne(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 5); } -void ccmpng(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 14); } -void ccmpng(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 14); } -void ccmpnge(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 12); } -void ccmpnge(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 12); } -void ccmpnl(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 13); } -void ccmpnl(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 13); } -void ccmpnle(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 15); } -void ccmpnle(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 15); } -void ccmpno(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 1); } -void ccmpno(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 1); } -void ccmpns(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 9); } -void ccmpns(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 9); } -void ccmpnz(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 5); } -void ccmpnz(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 5); } -void ccmpo(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 0); } -void ccmpo(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 0); } -void ccmps(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 8); } -void ccmps(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 8); } -void ccmpt(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 10); } -void ccmpt(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 10); } -void ccmpz(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 4); } -void ccmpz(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 4); } +void ccmpa(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 7); } +void ccmpa(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 7); } +void ccmpae(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 3); } +void ccmpae(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 3); } +void ccmpb(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 2); } +void ccmpb(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 2); } +void ccmpbe(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 6); } +void ccmpbe(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 6); } +void ccmpc(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 2); } +void ccmpc(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 2); } +void ccmpe(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 4); } +void ccmpe(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 4); } +void ccmpf(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 11); } +void ccmpf(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 11); } +void ccmpg(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 15); } +void ccmpg(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 15); } +void ccmpge(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 13); } +void ccmpge(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 13); } +void ccmpl(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 12); } +void ccmpl(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 12); } +void ccmple(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 14); } +void ccmple(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 14); } +void ccmpna(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 6); } +void ccmpna(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 6); } +void ccmpnae(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 2); } +void ccmpnae(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 2); } +void ccmpnb(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 3); } +void ccmpnb(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 3); } +void ccmpnbe(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 7); } +void ccmpnbe(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 7); } +void ccmpnc(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 3); } +void ccmpnc(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 3); } +void ccmpne(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 5); } +void ccmpne(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 5); } +void ccmpng(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 14); } +void ccmpng(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 14); } +void ccmpnge(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 12); } +void ccmpnge(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 12); } +void ccmpnl(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 13); } +void ccmpnl(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 13); } +void ccmpnle(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 15); } +void ccmpnle(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 15); } +void ccmpno(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 1); } +void ccmpno(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 1); } +void ccmpns(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 9); } +void ccmpns(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 9); } +void ccmpnz(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 5); } +void ccmpnz(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 5); } +void ccmpo(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 0); } +void ccmpo(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 0); } +void ccmps(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 8); } +void ccmps(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 8); } +void ccmpt(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 10); } +void ccmpt(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 10); } +void ccmpz(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 4); } +void ccmpz(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 4); } void cdq() { db(0x99); } void cfcmovb(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x42); } void cfcmovb(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x42); } @@ -271,62 +270,62 @@ void comiss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x2F, isXMM_XMMorMEM); } void cpuid() { db(0x0F); db(0xA2); } void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); } -void ctesta(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 7); } -void ctesta(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 7); } -void ctestae(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 3); } -void ctestae(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 3); } -void ctestb(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 2); } -void ctestb(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 2); } -void ctestbe(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 6); } -void ctestbe(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 6); } -void ctestc(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 2); } -void ctestc(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 2); } -void cteste(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 4); } -void cteste(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 4); } -void ctestf(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 11); } -void ctestf(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 11); } -void ctestg(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 15); } -void ctestg(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 15); } -void ctestge(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 13); } -void ctestge(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 13); } -void ctestl(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 12); } -void ctestl(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 12); } -void ctestle(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 14); } -void ctestle(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 14); } -void ctestna(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 6); } -void ctestna(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 6); } -void ctestnae(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 2); } -void ctestnae(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 2); } -void ctestnb(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 3); } -void ctestnb(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 3); } -void ctestnbe(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 7); } -void ctestnbe(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 7); } -void ctestnc(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 3); } -void ctestnc(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 3); } -void ctestne(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 5); } -void ctestne(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 5); } -void ctestng(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 14); } -void ctestng(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 14); } -void ctestnge(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 12); } -void ctestnge(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 12); } -void ctestnl(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 13); } -void ctestnl(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 13); } -void ctestnle(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 15); } -void ctestnle(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 15); } -void ctestno(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 1); } -void ctestno(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 1); } -void ctestns(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 9); } -void ctestns(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 9); } -void ctestnz(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 5); } -void ctestnz(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 5); } -void ctesto(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 0); } -void ctesto(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 0); } -void ctests(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 8); } -void ctests(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 8); } -void ctestt(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 10); } -void ctestt(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 10); } -void ctestz(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 4); } -void ctestz(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 4); } +void ctesta(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 7); } +void ctesta(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 7); } +void ctestae(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 3); } +void ctestae(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 3); } +void ctestb(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 2); } +void ctestb(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 2); } +void ctestbe(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 6); } +void ctestbe(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 6); } +void ctestc(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 2); } +void ctestc(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 2); } +void cteste(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 4); } +void cteste(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 4); } +void ctestf(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 11); } +void ctestf(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 11); } +void ctestg(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 15); } +void ctestg(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 15); } +void ctestge(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 13); } +void ctestge(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 13); } +void ctestl(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 12); } +void ctestl(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 12); } +void ctestle(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 14); } +void ctestle(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 14); } +void ctestna(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 6); } +void ctestna(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 6); } +void ctestnae(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 2); } +void ctestnae(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 2); } +void ctestnb(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 3); } +void ctestnb(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 3); } +void ctestnbe(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 7); } +void ctestnbe(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 7); } +void ctestnc(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 3); } +void ctestnc(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 3); } +void ctestne(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 5); } +void ctestne(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 5); } +void ctestng(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 14); } +void ctestng(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 14); } +void ctestnge(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 12); } +void ctestnge(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 12); } +void ctestnl(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 13); } +void ctestnl(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 13); } +void ctestnle(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 15); } +void ctestnle(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 15); } +void ctestno(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 1); } +void ctestno(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 1); } +void ctestns(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 9); } +void ctestns(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 9); } +void ctestnz(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 5); } +void ctestnz(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 5); } +void ctesto(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 0); } +void ctesto(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 0); } +void ctests(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 8); } +void ctests(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 8); } +void ctestt(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 10); } +void ctestt(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 10); } +void ctestz(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 4); } +void ctestz(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 4); } void cvtdq2pd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0xE6, isXMM_XMMorMEM); } void cvtdq2ps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5B, isXMM_XMMorMEM); } void cvtpd2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F, 0xE6, isXMM_XMMorMEM); } @@ -685,15 +684,15 @@ void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_66); } void movaps(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x29); } void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_NONE); } -void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); } -void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); } +void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); } +void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); } void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); } void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); } void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); } void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); } void movddup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12, isXMM_XMMorMEM, NONE); } -void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); } -void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(), addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); } +void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); } +void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); } void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); } void movdqa(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x7F); } void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_66); } @@ -932,7 +931,7 @@ void ror(const Operand& op, int imm) { opShift(op, imm, 9); } void ror(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 9, &d); } void ror(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 9, &d); } -void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX|T_MAP3, 0xF0, imm); } +void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX, 0xF0, imm); } void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x09, isXMM_XMMorMEM, imm); } void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x08, isXMM_XMMorMEM, imm); } void roundsd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0B, isXMM_XMMorMEM, static_cast(imm)); } @@ -957,36 +956,36 @@ void scasd() { db(0xAF); } void scasw() { db(0x66); db(0xAF); } void serialize() { db(0x0F); db(0x01); db(0xE8); } -void seta(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 7)) return; opRext(op, 8, 0, T_0F, 0x90 | 7); }//-V524 -void setae(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524 -void setb(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524 -void setbe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 6)) return; opRext(op, 8, 0, T_0F, 0x90 | 6); }//-V524 -void setc(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524 -void sete(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524 -void setg(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 15)) return; opRext(op, 8, 0, T_0F, 0x90 | 15); }//-V524 -void setge(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 13)) return; opRext(op, 8, 0, T_0F, 0x90 | 13); }//-V524 -void setl(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 12)) return; opRext(op, 8, 0, T_0F, 0x90 | 12); }//-V524 -void setle(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 14)) return; opRext(op, 8, 0, T_0F, 0x90 | 14); }//-V524 -void setna(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 6)) return; opRext(op, 8, 0, T_0F, 0x90 | 6); }//-V524 -void setnae(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524 -void setnb(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524 -void setnbe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 7)) return; opRext(op, 8, 0, T_0F, 0x90 | 7); }//-V524 -void setnc(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524 -void setne(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 5)) return; opRext(op, 8, 0, T_0F, 0x90 | 5); }//-V524 -void setng(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 14)) return; opRext(op, 8, 0, T_0F, 0x90 | 14); }//-V524 -void setnge(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 12)) return; opRext(op, 8, 0, T_0F, 0x90 | 12); }//-V524 -void setnl(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 13)) return; opRext(op, 8, 0, T_0F, 0x90 | 13); }//-V524 -void setnle(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 15)) return; opRext(op, 8, 0, T_0F, 0x90 | 15); }//-V524 -void setno(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 1)) return; opRext(op, 8, 0, T_0F, 0x90 | 1); }//-V524 -void setnp(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 11)) return; opRext(op, 8, 0, T_0F, 0x90 | 11); }//-V524 -void setns(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 9)) return; opRext(op, 8, 0, T_0F, 0x90 | 9); }//-V524 -void setnz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 5)) return; opRext(op, 8, 0, T_0F, 0x90 | 5); }//-V524 -void seto(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 0)) return; opRext(op, 8, 0, T_0F, 0x90 | 0); }//-V524 -void setp(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 10)) return; opRext(op, 8, 0, T_0F, 0x90 | 10); }//-V524 -void setpe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 10)) return; opRext(op, 8, 0, T_0F, 0x90 | 10); }//-V524 -void setpo(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 11)) return; opRext(op, 8, 0, T_0F, 0x90 | 11); }//-V524 -void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524 -void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524 +void seta(const Operand& op) { opSetCC(op, 7); }//-V524 +void setae(const Operand& op) { opSetCC(op, 3); }//-V524 +void setb(const Operand& op) { opSetCC(op, 2); }//-V524 +void setbe(const Operand& op) { opSetCC(op, 6); }//-V524 +void setc(const Operand& op) { opSetCC(op, 2); }//-V524 +void sete(const Operand& op) { opSetCC(op, 4); }//-V524 +void setg(const Operand& op) { opSetCC(op, 15); }//-V524 +void setge(const Operand& op) { opSetCC(op, 13); }//-V524 +void setl(const Operand& op) { opSetCC(op, 12); }//-V524 +void setle(const Operand& op) { opSetCC(op, 14); }//-V524 +void setna(const Operand& op) { opSetCC(op, 6); }//-V524 +void setnae(const Operand& op) { opSetCC(op, 2); }//-V524 +void setnb(const Operand& op) { opSetCC(op, 3); }//-V524 +void setnbe(const Operand& op) { opSetCC(op, 7); }//-V524 +void setnc(const Operand& op) { opSetCC(op, 3); }//-V524 +void setne(const Operand& op) { opSetCC(op, 5); }//-V524 +void setng(const Operand& op) { opSetCC(op, 14); }//-V524 +void setnge(const Operand& op) { opSetCC(op, 12); }//-V524 +void setnl(const Operand& op) { opSetCC(op, 13); }//-V524 +void setnle(const Operand& op) { opSetCC(op, 15); }//-V524 +void setno(const Operand& op) { opSetCC(op, 1); }//-V524 +void setnp(const Operand& op) { opSetCC(op, 11); }//-V524 +void setns(const Operand& op) { opSetCC(op, 9); }//-V524 +void setnz(const Operand& op) { opSetCC(op, 5); }//-V524 +void seto(const Operand& op) { opSetCC(op, 0); }//-V524 +void setp(const Operand& op) { opSetCC(op, 10); }//-V524 +void setpe(const Operand& op) { opSetCC(op, 10); }//-V524 +void setpo(const Operand& op) { opSetCC(op, 11); }//-V524 +void sets(const Operand& op) { opSetCC(op, 8); }//-V524 +void setz(const Operand& op) { opSetCC(op, 4); }//-V524 void sfence() { db(0x0F); db(0xAE); db(0xF8); } void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); } void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); }