SPU: rewrite ROTH (AVX-512)

This commit is contained in:
Nekotekina 2018-01-03 01:03:35 +03:00
parent af3e1fc580
commit 83b541ea9d

View file

@ -762,23 +762,18 @@ void spu_recompiler::ROTH(spu_opcode_t op) //nf
{ {
if (utils::has_512()) if (utils::has_512())
{ {
// Trying to implement 16-bit rotates using 32-bit rotates and only XMM registers.
// 1) Cannot use YMM/ZMM: transition penalty in mixed code, CPU frequency penalty.
// 2) Cross-lane instructions like VPMOVZX are expensive.
const XmmLink& va = XmmGet(op.ra, XmmType::Int); const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int); const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& v1 = XmmAlloc(); const XmmLink& vt = XmmAlloc();
const XmmLink& v2 = XmmAlloc(); const XmmLink& v4 = XmmAlloc();
c->vpunpckhwd(v1, va, va); c->movdqa(v4, XmmConst(_mm_set1_epi16(0xf)));
c->vpunpcklwd(v2, va, va); c->pand(vb, v4);
c->vpunpckhwd(va, vb, vb); c->vpsllvw(vt, va, vb);
c->vpunpcklwd(vb, vb, vb); c->psubw(vb, XmmConst(_mm_set1_epi16(1)));
c->vprolvd(va, v1, va); c->pandn(vb, v4);
c->vprolvd(vb, v2, vb); c->vpsrlvw(va, va, vb);
c->psrad(va, 16); c->por(vt, va);
c->psrad(vb, 16); c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
c->packssdw(vb, va);
c->movdqa(SPU_OFF_128(gpr, op.rt), vb);
return; return;
} }