diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 312b2177..44d1f361 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1941,6 +1941,10 @@ void put64() puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }"); puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }"); puts("void jmpabs(uint64_t addr) { db(0xD5); db(0x00); db(0xA1); dq(addr); }"); + puts("void push2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_VEX|T_ND1|T_W0, 0xFF); }"); + puts("void push2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_VEX|T_ND1|T_W1, 0xFF); }"); + puts("void pop2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_VEX|T_ND1|T_W0, 0x8F); }"); + puts("void pop2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_VEX|T_ND1|T_W1, 0x8F); }"); // CMPccXADD { const struct Tbl { diff --git a/test/apx.cpp b/test/apx.cpp index ba767e23..3e0b0afb 100644 --- a/test/apx.cpp +++ b/test/apx.cpp @@ -1397,3 +1397,38 @@ CYBOZU_TEST_AUTO(shift_3op) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } + +CYBOZU_TEST_AUTO(push2_pop2) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + push2(r20, r30); + push2(rax, rcx); + push2p(r20, r30); + push2p(rdx, r8); + + pop2(rax, rcx); + pop2(r20, r30); + pop2p(rax, rcx); + pop2p(r20, r30); + } + } c; + const uint8_t tbl[] = { + // push2 + 0x62, 0xdc, 0x5c, 0x10, 0xff, 0xf6, + 0x62, 0xf4, 0x7c, 0x18, 0xff, 0xf1, + // push2p (What is this?) + 0x62, 0xdc, 0xdc, 0x10, 0xff, 0xf6, + 0x62, 0xd4, 0xec, 0x18, 0xff, 0xf0, + // pop2 + 0x62, 0xf4, 0x7c, 0x18, 0x8f, 0xc1, + 0x62, 0xdc, 0x5c, 0x10, 0x8f, 0xc6, + // pop2p + 0x62, 0xf4, 0xfc, 0x18, 0x8f, 0xc1, + 0x62, 0xdc, 0xdc, 0x10, 0x8f, 0xc6, + }; + const size_t n = sizeof(tbl); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index d4c5b14c..e092b05b 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1927,7 +1927,7 @@ class CodeGenerator : public CodeArray { int B3 = b.isExtIdx() ? 0 : 0x20; int R4 = r.isExtIdx2() ? 0 : 0x10; int B4 = b.isExtIdx2() ? 0x08 : 0; - int w = r.isBit(64) || v.isBit(64) || (type & T_W1); + int w = (type & T_W0) ? 0 : (r.isBit(64) || v.isBit(64) || (type & T_W1)); int V = (~v.getIdx() & 15) << 3; int X4 = x.isExtIdx2() ? 0 : 0x04; int pp = (type & (T_F2|T_F3|T_66)) ? getPP(type) : (r.isBit(16) || v.isBit(16)); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 54623d5d..b5a29332 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1732,6 +1732,10 @@ void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx() void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); } void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); } void jmpabs(uint64_t addr) { db(0xD5); db(0x00); db(0xA1); dq(addr); } +void push2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_VEX|T_ND1|T_W0, 0xFF); } +void push2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_VEX|T_ND1|T_W1, 0xFF); } +void pop2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_VEX|T_ND1|T_W0, 0x8F); } +void pop2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_VEX|T_ND1|T_W1, 0x8F); } void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_66 | T_0F38, 0xE6); } void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_66 | T_0F38, 0xE2); } void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_66 | T_0F38, 0xEE); }