SPU LLVM: Use VDBPSADBW in SUMB

- This instruction can be used to sum bytes horrizontally if the second input vector is all zeroes.
This commit is contained in:
Malcolm Jestadt 2021-09-26 04:40:47 -04:00 committed by Ivan
parent 8b3a3e4ac8
commit f9ab077908
2 changed files with 32 additions and 0 deletions

View file

@ -7022,6 +7022,23 @@ public:
void SUMB(spu_opcode_t op)
{
if (m_use_avx512)
{
const auto [a, b] = get_vrs<u8[16]>(op.ra, op.rb);
const auto zeroes = splat<u8[16]>(0);
if (op.ra == op.rb && !m_interp_magn)
{
set_vr(op.rt, vdbpsadbw(a, zeroes, 0));
return;
}
const auto ax = vdbpsadbw(a, zeroes, 0);
const auto bx = vdbpsadbw(b, zeroes, 0);
set_vr(op.rt, shuffle2(ax, bx, 0, 8, 2, 10, 4, 12, 6, 14));
return;
}
if (m_use_vnni)
{
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);