mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 10:48:36 +12:00
PPU LLVM arm64+macOS port (#12115)
* BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
This commit is contained in:
parent
264253757c
commit
cefc37a553
14 changed files with 306 additions and 16 deletions
|
@ -34,6 +34,15 @@ const extern spu_decoder<spu_iflag> g_spu_iflag;
|
|||
// Move 4 args for calling native function from a GHC calling convention function
|
||||
static u8* move_args_ghc_to_native(u8* raw)
|
||||
{
|
||||
#ifdef ARCH_ARM64
|
||||
// Note: this is a placeholder to get rpcs3 working for now
|
||||
// mov x0, x22
|
||||
// mov x1, x23
|
||||
// mov x2, x24
|
||||
// mov x3, x25
|
||||
std::memcpy(raw, "\xE0\x03\x16\xAA\xE1\x03\x17\xAA\xE2\x03\x18\xAA\xE3\x03\x19\xAA", 16);
|
||||
return raw + 16;
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
// mov rcx, r13
|
||||
// mov rdx, rbp
|
||||
|
@ -49,10 +58,14 @@ static u8* move_args_ghc_to_native(u8* raw)
|
|||
#endif
|
||||
|
||||
return raw + 12;
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLARE(spu_runtime::tr_dispatch) = []
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
pthread_jit_write_protect_np(false);
|
||||
#endif
|
||||
// Generate a special trampoline to spu_recompiler_base::dispatch with pause instruction
|
||||
u8* const trptr = jit_runtime::alloc(32, 16);
|
||||
u8* raw = move_args_ghc_to_native(trptr);
|
||||
|
@ -439,6 +452,9 @@ void spu_cache::initialize()
|
|||
|
||||
named_thread_group workers("SPU Worker ", worker_count, [&]() -> uint
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
pthread_jit_write_protect_np(false);
|
||||
#endif
|
||||
// Set low priority
|
||||
thread_ctrl::scoped_priority low_prio(-1);
|
||||
|
||||
|
@ -4412,7 +4428,7 @@ public:
|
|||
|
||||
// Create LLVM module
|
||||
std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context);
|
||||
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||
_module->setTargetTriple(utils::c_llvm_default_triple);
|
||||
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
||||
m_module = _module.get();
|
||||
|
||||
|
@ -4672,7 +4688,12 @@ public:
|
|||
// Function that executes check_state and escapes if necessary
|
||||
m_test_state = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("spu_test_state", get_ftype<void, u8*>()).getCallee());
|
||||
m_test_state->setLinkage(GlobalValue::InternalLinkage);
|
||||
#ifdef ARCH_ARM64
|
||||
// LLVM doesn't support PreserveAll on arm64.
|
||||
m_test_state->setCallingConv(CallingConv::GHC);
|
||||
#else
|
||||
m_test_state->setCallingConv(CallingConv::PreserveAll);
|
||||
#endif
|
||||
m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", m_test_state));
|
||||
const auto escape_yes = BasicBlock::Create(m_context, "", m_test_state);
|
||||
const auto escape_no = BasicBlock::Create(m_context, "", m_test_state);
|
||||
|
@ -5069,7 +5090,7 @@ public:
|
|||
|
||||
// Create LLVM module
|
||||
std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context);
|
||||
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||
_module->setTargetTriple(utils::c_llvm_default_triple);
|
||||
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
||||
m_module = _module.get();
|
||||
|
||||
|
@ -5114,7 +5135,11 @@ public:
|
|||
|
||||
// Save host thread's stack pointer
|
||||
const auto native_sp = spu_ptr<u64>(&spu_thread::saved_native_sp);
|
||||
#if defined(ARCH_X64)
|
||||
const auto rsp_name = MetadataAsValue::get(m_context, MDNode::get(m_context, {MDString::get(m_context, "rsp")}));
|
||||
#elif defined(ARCH_ARM64)
|
||||
const auto rsp_name = MetadataAsValue::get(m_context, MDNode::get(m_context, {MDString::get(m_context, "sp")}));
|
||||
#endif
|
||||
m_ir->CreateStore(m_ir->CreateCall(get_intrinsic<u64>(Intrinsic::read_register), {rsp_name}), native_sp);
|
||||
|
||||
// Decode (shift) and load function pointer
|
||||
|
@ -5328,7 +5353,11 @@ public:
|
|||
else if (!(itype & spu_itype::branch))
|
||||
{
|
||||
// Hack: inline ret instruction before final jmp; this is not reliable.
|
||||
#ifdef ARCH_X64
|
||||
m_ir->CreateCall(InlineAsm::get(get_ftype<void>(), "ret", "", true, false, InlineAsm::AD_Intel));
|
||||
#else
|
||||
m_ir->CreateCall(InlineAsm::get(get_ftype<void>(), "ret", "", true, false));
|
||||
#endif
|
||||
fret = ret_func;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue