Skip to content

Commit

Permalink
vmovw supports avx10.2
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 14, 2024
1 parent 46238d9 commit 0c2f7fc
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 35 deletions.
7 changes: 4 additions & 3 deletions doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ vpdpbusd(xm0, xm1, xm2); // VEX
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI)
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2)
setDefaultEncoding(VexEncoding, AVX10p2Encoding); // use 2nd argument.
setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument.
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2)
```

Expand All @@ -133,9 +133,10 @@ Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.

param|vnniEnc|avx10Enc
-|-|-
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
VexEncoding|AVX-VNNI|-
EvexEncoding|AVX512-VNNI|-
AVX10p2Encoding|-|AVX10.2
PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16
AVX10v2Encoding|-|AVX10.2
default|EvexEncoding|VexEncoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw

Expand Down
7 changes: 0 additions & 7 deletions gen/gen_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ void putM_X()
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K },
{ 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down Expand Up @@ -1079,12 +1078,6 @@ void putFP16_2()
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str());
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str());
}
{
uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
std::string s = type2String(type);
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str());
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str());
}
}

void putFP16()
Expand Down
2 changes: 1 addition & 1 deletion test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64

TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt minmax.txt saturation.txt
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
xed_test:
@set -e; \
for target in $(addprefix avx10/, $(TEST_FILES)); do \
Expand Down
7 changes: 7 additions & 0 deletions test/avx10/misc.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// AVX10 integer and FP16 VNNI, media and zero-extending
vdpphps(xm1, xm2, xm3);
vdpphps(xm1, xm2, ptr[rax+128]);
vdpphps(xm1, xm2, ptr_b[rax+128]);
Expand Down Expand Up @@ -168,5 +169,11 @@ vpdpwuuds(zm1, zm2, ptr_b[rax+128]);

//
vmovd(xm10, xm20);
vmovd(xm1, xm2);
vmovd(xm10, ptr[rax+128]);
vmovd(ptr[rax+128], xm30);
//
vmovw(xm1, xm20);
vmovw(xm1, xm2);
vmovw(xm3, ptr [rax+0x40]);
vmovw(ptr [rax+0x40], xm7);
4 changes: 0 additions & 4 deletions test/avx10/old.txt
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,6 @@ vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
vmovsh(ptr [rax+0x40]|k1, xmm1);
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
vmovw(xmm1, r13d);
vmovw(xmm3, ptr [rax+0x40]);
vmovw(r9d, xmm1);
vmovw(ptr [rax+0x40], xmm7);
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
Expand Down
2 changes: 1 addition & 1 deletion test/test_by_xed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
setDefaultEncoding(EvexEncoding, AVX10p2Encoding);
setDefaultEncoding(EvexEncoding, AVX10v2Encoding);
#include "tmp.cpp"
}
};
Expand Down
47 changes: 31 additions & 16 deletions xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -1674,8 +1674,8 @@ typedef enum {
DefaultEncoding,
VexEncoding,
EvexEncoding,
AVX512Encoding = EvexEncoding,
AVX10p2Encoding
PreAVX10v2Encoding = EvexEncoding,
AVX10v2Encoding
} PreferredEncoding;

class CodeGenerator : public CodeArray {
Expand Down Expand Up @@ -3177,9 +3177,9 @@ class CodeGenerator : public CodeArray {
#endif

// set default encoding
// vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex)
// avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (AVX10p2Encoding)
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)
// vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding)
// avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding)
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding)
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }

void bswap(const Reg32e& r)
Expand All @@ -3194,7 +3194,8 @@ class CodeGenerator : public CodeArray {
}
db(0xC8 + (idx & 7));
}
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
// AVX10 zero-extending for vmovd, vmovw
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit)
{
const Operand *p1 = &op1;
const Operand *p2 = &op2;
Expand All @@ -3208,18 +3209,32 @@ class CodeGenerator : public CodeArray {
std::swap(p1, p2);
rev = !rev;
}
if (getEncoding(encoding, 1) == AVX10p2Encoding) {
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) {
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|(rev ? T_F3 : T_66)|T_MUST_EVEX|T_0F|T_EW0|T_N4, rev ? 0x7E : 0xD6);
return;
}
int sel = -1;
if (getEncoding(encoding, 1) == AVX10v2Encoding) {
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
} else {
if ((p1->isREG(32) || p1->isMEM()) && p2->isXMM()) {
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|T_66|T_0F|T_W0|T_N4, rev ? 0x6E : 0x7E);
return;
}
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
}
XBYAK_THROW(ERR_BAD_COMBINATION)
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
}
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32);
}
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64);
}
/*
use single byte nop if useMultiByteNop = false
Expand Down
3 changes: 0 additions & 3 deletions xbyak/xbyak_mnemonic.h
Original file line number Diff line number Diff line change
Expand Up @@ -2422,9 +2422,6 @@ void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F
void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); }
void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); }
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
Expand Down

0 comments on commit 0c2f7fc

Please sign in to comment.