mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-01 20:41:19 +12:00
Add AArch64 recompiler backend (#1556)
This commit is contained in:
parent
d13dab0fd8
commit
081ebead5f
9 changed files with 1766 additions and 7 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -18,3 +18,6 @@
|
|||
path = dependencies/imgui
|
||||
url = https://github.com/ocornut/imgui
|
||||
shallow = true
|
||||
[submodule "dependencies/xbyak_aarch64"]
|
||||
path = dependencies/xbyak_aarch64
|
||||
url = https://github.com/fujitsu/xbyak_aarch64
|
||||
|
|
|
@ -222,6 +222,10 @@ endif()
|
|||
|
||||
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
|
||||
add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
find_package(ZArchive)
|
||||
if (NOT ZArchive_FOUND)
|
||||
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
|
||||
|
|
1
dependencies/xbyak_aarch64
vendored
Submodule
1
dependencies/xbyak_aarch64
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d
|
|
@ -537,6 +537,14 @@ if(APPLE)
|
|||
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
|
||||
target_sources(CemuCafe PRIVATE
|
||||
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
|
||||
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
|
||||
)
|
||||
target_link_libraries(CemuCafe PRIVATE xbyak_aarch64)
|
||||
endif()
|
||||
|
||||
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
target_include_directories(CemuCafe PUBLIC "../")
|
||||
|
|
1693
src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
Normal file
1693
src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "HW/Espresso/Recompiler/IML/IMLInstruction.h"
|
||||
#include "../PPCRecompiler.h"
|
||||
|
||||
bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_cleanupAArch64Code(void* code, size_t size);
|
||||
|
||||
void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
// architecture specific constants
|
||||
namespace IMLArchAArch64
|
||||
{
|
||||
static constexpr int PHYSREG_GPR_BASE = 0;
|
||||
static constexpr int PHYSREG_GPR_COUNT = 25;
|
||||
static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT;
|
||||
static constexpr int PHYSREG_FPR_COUNT = 31;
|
||||
}; // namespace IMLArchAArch64
|
|
@ -702,8 +702,10 @@ void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis&
|
|||
{
|
||||
IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
// x86 specific optimizations
|
||||
IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
|
||||
#endif
|
||||
}
|
||||
|
||||
void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#include "IMLRegisterAllocatorRanges.h"
|
||||
|
||||
#include "../BackendX64/BackendX64.h"
|
||||
#ifdef __aarch64__
|
||||
#include "../BackendAArch64/BackendAArch64.h"
|
||||
#endif
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
@ -127,23 +130,22 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe
|
|||
fixedRegs.listInput.clear();
|
||||
fixedRegs.listOutput.clear();
|
||||
|
||||
// code below for aarch64 has not been tested
|
||||
// The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
|
||||
// on x86 this is used for instructions like SHL <reg>, CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
|
||||
cemu_assert_unimplemented();
|
||||
#ifdef 0
|
||||
if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
||||
{
|
||||
const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
|
||||
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
|
||||
IMLPhysRegisterSet volatileRegs;
|
||||
for (int i=0; i<19; i++) // x0 to x18 are volatile
|
||||
for (int i = 0; i <= 17; i++) // x0 to x17 are volatile
|
||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
|
||||
for (int i = 0; i <= 31; i++) // which float registers are volatile?
|
||||
// v0-v7 & v16-v31 are volatile. For v8-v15 only the high 64 bits are volatile.
|
||||
for (int i = 0; i <= 7; i++)
|
||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
||||
for (int i = 16; i <= 31; i++)
|
||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
||||
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// x86-64
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
#include "IML/IML.h"
|
||||
#include "IML/IMLRegisterAllocator.h"
|
||||
#include "BackendX64/BackendX64.h"
|
||||
#ifdef __aarch64__
|
||||
#include "BackendAArch64/BackendAArch64.h"
|
||||
#endif
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
|
||||
#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock
|
||||
|
@ -220,12 +223,20 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
// emit x64 code
|
||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (x64GenerationSuccess == false)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
bool aarch64GenerationSuccess = PPCRecompiler_generateAArch64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (aarch64GenerationSuccess == false)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
if (ActiveSettings::DumpRecompilerFunctionsEnabled())
|
||||
{
|
||||
FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress)));
|
||||
|
@ -270,6 +281,7 @@ void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenCont
|
|||
for (auto& it : ppcImlGenContext.mappedRegs)
|
||||
raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first);
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
|
||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
|
||||
|
@ -301,6 +313,19 @@ void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenCont
|
|||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13);
|
||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14);
|
||||
#elif defined(__aarch64__)
|
||||
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
||||
for (auto i = IMLArchAArch64::PHYSREG_GPR_BASE; i < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT; i++)
|
||||
{
|
||||
if (i == IMLArchAArch64::PHYSREG_GPR_BASE + 18)
|
||||
continue; // Skip reserved platform register
|
||||
gprPhysPool.SetAvailable(i);
|
||||
}
|
||||
|
||||
auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
|
||||
for (auto i = IMLArchAArch64::PHYSREG_FPR_BASE; i < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT; i++)
|
||||
fprPhysPool.SetAvailable(i);
|
||||
#endif
|
||||
|
||||
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
|
||||
}
|
||||
|
@ -679,8 +704,11 @@ void PPCRecompiler_init()
|
|||
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
||||
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
||||
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||
#ifdef ARCH_X86_64
|
||||
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
|
||||
#endif
|
||||
PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
|
||||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue