From 4ce2ad54a8f957b04834bbdfc856e07020456b2c Mon Sep 17 00:00:00 2001 From: Whatcookie Date: Sat, 8 Aug 2020 20:50:26 -0400 Subject: [PATCH] PPU LLVM: Use VPERM2B to emulate VPERM (#8704) - The VPERM2B instructions are a match of VPERM's behavior, besides operating in reverse byte order --- rpcs3/Emu/CPU/CPUTranslator.cpp | 9 +++++++++ rpcs3/Emu/CPU/CPUTranslator.h | 30 ++++++++++++++++++++++++++++++ rpcs3/Emu/Cell/PPUTranslator.cpp | 8 ++++++++ 3 files changed, 47 insertions(+) diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 5339012e0d..5b653be6fe 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -57,6 +57,15 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin { m_use_fma = true; } + + // Test AVX-512_icelake features (TODO) + if (cpu == "icelake" || + cpu == "icelake-client" || + cpu == "icelake-server" || + cpu == "tigerlake") + { + m_use_avx512_icl = true; + } } llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 56e6c7d7f0..f9f10610d4 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -2423,6 +2423,9 @@ protected: // Allow FMA bool m_use_fma = false; + // Allow Icelake tier AVX-512 + bool m_use_avx512_icl = false; + // IR builder llvm::IRBuilder<>* m_ir; @@ -2782,6 +2785,33 @@ public: return result; } + template + value_t vperm2b(T1 a, T2 b, T3 c) + { + value_t result; + + const auto data0 = a.eval(m_ir); + const auto data1 = b.eval(m_ir); + const auto index = c.eval(m_ir); + const auto zeros = llvm::ConstantAggregateZero::get(get_type()); + + if (auto c = llvm::dyn_cast(index)) + { + // Convert VPERM2B index back to LLVM vector shuffle mask + const auto cv = llvm::dyn_cast(c); + + if (cv || llvm::isa(c)) + { + result.value = m_ir->CreateZExt(cv, get_type()); + result.value = m_ir->CreateShuffleVector(data0, data1, result.value); + return result; + } + } + + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_vpermi2var_qi_128), {data0, index, data1}); + return result; + } + template value_t pshufb(T1 a, T2 b) { diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 9900e2a467..cffa9be331 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1295,6 +1295,14 @@ void PPUTranslator::VPERM(ppu_opcode_t op) const auto a = get_vr(op.va); const auto b = get_vr(op.vb); const auto c = get_vr(op.vc); + + if (m_use_avx512_icl && op.ra != op.rb) + { + const auto i = eval(~c); + set_vr(op.vd, vperm2b(b, a, i)); + return; + } + const auto i = eval(~c & 0x1f); set_vr(op.vd, select(noncast(c << 3) >= 0, pshufb(a, i), pshufb(b, i))); }