From f9ab077908b001995a271761a7a4ce4cf2047d30 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Sun, 26 Sep 2021 04:40:47 -0400 Subject: [PATCH] SPU LLVM: Use VDBPSADBW in SUMB - This instruction can be used to sum bytes horrizontally if the second input vector is all zeroes. --- rpcs3/Emu/CPU/CPUTranslator.h | 15 +++++++++++++++ rpcs3/Emu/Cell/SPURecompiler.cpp | 17 +++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 52d3f690d0..d25783989a 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3366,6 +3366,21 @@ public: return result; } + template + value_t vdbpsadbw(T1 a, T2 b, u8 c) + { + value_t result; + + const auto data0 = a.eval(m_ir); + const auto data1 = b.eval(m_ir); + + const auto immediate = (llvm_const_int{c}); + const auto imm8 = immediate.eval(m_ir); + + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_dbpsadbw_128), {data0, data1, imm8}); + return result; + } + template value_t vpermb(T1 a, T2 b) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 000e61c0fd..cc34903816 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7022,6 +7022,23 @@ public: void SUMB(spu_opcode_t op) { + if (m_use_avx512) + { + const auto [a, b] = get_vrs(op.ra, op.rb); + const auto zeroes = splat(0); + + if (op.ra == op.rb && !m_interp_magn) + { + set_vr(op.rt, vdbpsadbw(a, zeroes, 0)); + return; + } + + const auto ax = vdbpsadbw(a, zeroes, 0); + const auto bx = vdbpsadbw(b, zeroes, 0); + set_vr(op.rt, shuffle2(ax, bx, 0, 8, 2, 10, 4, 12, 6, 14)); + return; + } + if (m_use_vnni) { const auto [a, b] = get_vrs(op.ra, op.rb);