Migrate PPU to post-process execution model

This commit is contained in:
kd-11 2024-08-04 18:33:28 +03:00 committed by kd-11
parent a976ac3353
commit 34549445a8
5 changed files with 58 additions and 72 deletions

View file

@ -23,7 +23,7 @@ namespace aarch64
using function_info_t = GHC_frame_preservation_pass::function_info_t; using function_info_t = GHC_frame_preservation_pass::function_info_t;
GHC_frame_preservation_pass::GHC_frame_preservation_pass( GHC_frame_preservation_pass::GHC_frame_preservation_pass(
gprs base_reg, gpr base_reg,
u32 hv_ctx_offset, u32 hv_ctx_offset,
std::function<bool(const std::string&)> exclusion_callback) std::function<bool(const std::string&)> exclusion_callback)
{ {
@ -226,7 +226,7 @@ namespace aarch64
"add x30, x%u, x30;\n" // Add to base register "add x30, x%u, x30;\n" // Add to base register
"ldr x30, [x30];\n", // Load x30 "ldr x30, [x30];\n", // Load x30
execution_context.hypervisor_context_offset, execution_context.hypervisor_context_offset,
execution_context.base_register); static_cast<u32>(execution_context.base_register));
if (function_info.stack_frame_size > 0) if (function_info.stack_frame_size > 0)
{ {

View file

@ -11,7 +11,7 @@
namespace aarch64 namespace aarch64
{ {
enum gprs : s32 enum gpr : s32
{ {
x0 = 0, x0 = 0,
x1, x2, x3, x4, x5, x6, x7, x8, x9, x1, x2, x3, x4, x5, x6, x7, x8, x9,
@ -21,7 +21,7 @@ namespace aarch64
// On non-x86 architectures GHC runs stackless. SP is treated as a pointer to scratchpad memory. // On non-x86 architectures GHC runs stackless. SP is treated as a pointer to scratchpad memory.
// This pass keeps this behavior intact while preserving the expectations of the host's C++ ABI. // This pass keeps this behavior intact while preserving the expectations of the host's C++ ABI.
class GHC_frame_preservation_pass : translator_pass class GHC_frame_preservation_pass : public translator_pass
{ {
public: public:
struct function_info_t struct function_info_t
@ -46,7 +46,7 @@ namespace aarch64
struct struct
{ {
gprs base_register; gpr base_register;
u32 hypervisor_context_offset; u32 hypervisor_context_offset;
} execution_context; } execution_context;
@ -60,7 +60,7 @@ namespace aarch64
public: public:
GHC_frame_preservation_pass( GHC_frame_preservation_pass(
gprs base_reg, gpr base_reg,
u32 hv_ctx_offset, u32 hv_ctx_offset,
std::function<bool(const std::string&)> exclusion_callback = {}); std::function<bool(const std::string&)> exclusion_callback = {});
~GHC_frame_preservation_pass() = default; ~GHC_frame_preservation_pass() = default;

View file

@ -291,6 +291,13 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
// and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers // and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers
// for AArch64 calling convention // for AArch64 calling convention
// PPU function argument layout:
// x19 = m_exec
// x20 = m_thread,
// x21 = seg0
// x22 = m_base
// x23 - x25 = gpr[0] - gpr[3]
// Push callee saved registers to the hv context // Push callee saved registers to the hv context
// Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch. // Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch.
// We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation. // We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation.

View file

@ -16,6 +16,10 @@
#include <unordered_set> #include <unordered_set>
#include <span> #include <span>
#ifdef ARCH_ARM64
#include "Emu/CPU/Backends/AArch64JIT.h"
#endif
using namespace llvm; using namespace llvm;
const ppu_decoder<PPUTranslator> s_ppu_decoder; const ppu_decoder<PPUTranslator> s_ppu_decoder;
@ -30,6 +34,14 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
// Bind context // Bind context
cpu_translator::initialize(context, engine); cpu_translator::initialize(context, engine);
// Initialize transform passes
#ifdef ARCH_ARM64
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(
aarch64::x20, ::offset32(&ppu_thread::hv_ctx));
register_transform_pass(ghc_fixup_pass);
#endif
// Thread context struct (TODO: safer member access) // Thread context struct (TODO: safer member access)
const u32 off0 = offset32(&ppu_thread::state); const u32 off0 = offset32(&ppu_thread::state);
const u32 off1 = offset32(&ppu_thread::gpr); const u32 off1 = offset32(&ppu_thread::gpr);
@ -208,7 +220,8 @@ Function* PPUTranslator::Translate(const ppu_function& info)
m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease); m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease);
// Create tail call to the check function // Create tail call to the check function
VMEscape(Call(GetType<void>(), "__check", m_thread, GetAddr())); Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
m_ir->CreateRetVoid();
} }
else else
{ {
@ -269,7 +282,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
} }
} }
run_transforms(*m_function); replace_intrinsics(*m_function);
return m_function; return m_function;
} }
@ -320,8 +333,8 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
if (vec_addrs.empty()) if (vec_addrs.empty())
{ {
// Possible special case for no functions (allowing the do-while optimization) // Possible special case for no functions (allowing the do-while optimization)
m_ir->CreateRetVoid(); // FIXME: Aarch64. It should work fine as long as there is no callchain beyond this function with a ret path. m_ir->CreateRetVoid();
run_transforms(*m_function); replace_intrinsics(*m_function);
return m_function; return m_function;
} }
@ -377,9 +390,9 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
// Set insertion point to afterloop_block // Set insertion point to afterloop_block
m_ir->SetInsertPoint(after_loop); m_ir->SetInsertPoint(after_loop);
m_ir->CreateRetVoid(); // FIXME: Aarch64 - Should be ok as long as no ret-based callchain proceeds from here m_ir->CreateRetVoid();
run_transforms(*m_function); replace_intrinsics(*m_function);
return m_function; return m_function;
} }
@ -481,8 +494,8 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
if (_target >= u32{umax}) if (_target >= u32{umax})
{ {
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base))))); Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
VMEscape(c); m_ir->CreateRetVoid();
return; return;
} }
else if (_target >= caddr && _target <= cend) else if (_target >= caddr && _target <= cend)
@ -564,7 +577,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)}); const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)});
c->setTailCallKind(llvm::CallInst::TCK_Tail); c->setTailCallKind(llvm::CallInst::TCK_Tail);
c->setCallingConv(CallingConv::GHC); c->setCallingConv(CallingConv::GHC);
VMEscape(c); m_ir->CreateRetVoid();
} }
Value* PPUTranslator::RegInit(Value*& local) Value* PPUTranslator::RegInit(Value*& local)
@ -778,8 +791,8 @@ void PPUTranslator::TestAborted()
m_ir->SetInsertPoint(vcheck); m_ir->SetInsertPoint(vcheck);
// Create tail call to the check function // Create tail call to the check function
auto c = Call(GetType<void>(), "__check", m_thread, GetAddr()); Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
VMEscape(c); m_ir->CreateRetVoid();
m_ir->SetInsertPoint(body); m_ir->SetInsertPoint(body);
} }
@ -2205,14 +2218,16 @@ void PPUTranslator::SC(ppu_opcode_t op)
if (index < 1024) if (index < 1024)
{ {
auto c = Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread); Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
VMEscape(c, true); //Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return; return;
} }
} }
auto c = Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num); Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
VMEscape(c, true); //Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
} }
void PPUTranslator::B(ppu_opcode_t op) void PPUTranslator::B(ppu_opcode_t op)
@ -2773,9 +2788,9 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
{ {
RegStore(Trunc(GetAddr()), m_cia); RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters(); FlushRegisters();
Call(GetType<void>(), "__resinterp", m_thread);
auto inst = Call(GetType<void>(), "__resinterp", m_thread); //Call(GetType<void>(), "__escape", m_thread)->setTailCall();
VMEscape(inst, true); m_ir->CreateRetVoid();
return; return;
} }
@ -2925,9 +2940,9 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
{ {
RegStore(Trunc(GetAddr()), m_cia); RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters(); FlushRegisters();
Call(GetType<void>(), "__resinterp", m_thread);
auto inst = Call(GetType<void>(), "__resinterp", m_thread); //Call(GetType<void>(), "__escape", m_thread)->setTailCall();
VMEscape(inst, true); m_ir->CreateRetVoid();
return; return;
} }
@ -4995,8 +5010,9 @@ void PPUTranslator::FCFID(ppu_opcode_t op)
void PPUTranslator::UNK(ppu_opcode_t op) void PPUTranslator::UNK(ppu_opcode_t op)
{ {
FlushRegisters(); FlushRegisters();
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode)); Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
VMEscape(c, true); //Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
} }
@ -5275,8 +5291,9 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
void PPUTranslator::Trap() void PPUTranslator::Trap()
{ {
auto c = Call(GetType<void>(), "__trap", m_thread, GetAddr()); Call(GetType<void>(), "__trap", m_thread, GetAddr());
VMEscape(c); //Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
} }
Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi) Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
@ -5323,42 +5340,6 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
return nullptr; return nullptr;
} }
void PPUTranslator::VMEscape([[maybe_unused]] llvm::CallInst* tail_call, [[maybe_unused]] bool skip_flush)
{
//if (!skip_flush)
{
// Flush
FlushRegisters();
}
#ifdef ARCH_X64
// Optionally flag last call as a tail
if (tail_call)
{
tail_call->setTailCall();
}
// This is actually AMD64 specific but good enough for now
m_ir->CreateRetVoid();
#else
// Validation. Make sure we're escaping from a correct context. Only guest JIT should ever go through the "escape" gate.
const auto bb = m_ir->GetInsertPoint();
const auto arg = llvm::dyn_cast<llvm::Argument>(m_thread);
ensure(bb->getParent()->getName().str() == arg->getParent()->getName().str());
const u32 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const std::string asm_ = fmt::format(
"ldr x20, $0;\n"
"ldr x30, [x20, #%u];\n",
hv_register_array_offset);
LLVM_ASM(asm_, std::array{ m_thread }, "m", m_ir, m_function->getContext());
m_ir->CreateRetVoid();
#endif
}
void PPUTranslator::build_interpreter() void PPUTranslator::build_interpreter()
{ {
#define BUILD_VEC_INST(i) { \ #define BUILD_VEC_INST(i) { \
@ -5374,8 +5355,9 @@ void PPUTranslator::build_interpreter()
op.vb = 2; \ op.vb = 2; \
op.vc = 3; \ op.vc = 3; \
this->i(op); \ this->i(op); \
VMEscape(); \ FlushRegisters(); \
run_transforms(*m_function); \ m_ir->CreateRetVoid(); \
replace_intrinsics(*m_function); \
} }
BUILD_VEC_INST(VADDCUW); BUILD_VEC_INST(VADDCUW);

View file

@ -150,9 +150,6 @@ public:
// Emit function call // Emit function call
void CallFunction(u64 target, llvm::Value* indirect = nullptr); void CallFunction(u64 target, llvm::Value* indirect = nullptr);
// Emit escape sequence back to hypervisor
void VMEscape(llvm::CallInst* tail_call = nullptr, bool skip_flush = false);
// Emit state check mid-block // Emit state check mid-block
void TestAborted(); void TestAborted();