mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 04:51:19 +12:00
Add AArch64 recompiler backend (#1556)
This commit is contained in:
parent
d13dab0fd8
commit
081ebead5f
9 changed files with 1766 additions and 7 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -18,3 +18,6 @@
|
||||||
path = dependencies/imgui
|
path = dependencies/imgui
|
||||||
url = https://github.com/ocornut/imgui
|
url = https://github.com/ocornut/imgui
|
||||||
shallow = true
|
shallow = true
|
||||||
|
[submodule "dependencies/xbyak_aarch64"]
|
||||||
|
path = dependencies/xbyak_aarch64
|
||||||
|
url = https://github.com/fujitsu/xbyak_aarch64
|
||||||
|
|
|
@ -222,6 +222,10 @@ endif()
|
||||||
|
|
||||||
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
|
add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
|
||||||
|
add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL)
|
||||||
|
endif()
|
||||||
|
|
||||||
find_package(ZArchive)
|
find_package(ZArchive)
|
||||||
if (NOT ZArchive_FOUND)
|
if (NOT ZArchive_FOUND)
|
||||||
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
|
add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL)
|
||||||
|
|
1
dependencies/xbyak_aarch64
vendored
Submodule
1
dependencies/xbyak_aarch64
vendored
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d
|
|
@ -537,6 +537,14 @@ if(APPLE)
|
||||||
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
|
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
|
||||||
|
target_sources(CemuCafe PRIVATE
|
||||||
|
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
|
||||||
|
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
|
||||||
|
)
|
||||||
|
target_link_libraries(CemuCafe PRIVATE xbyak_aarch64)
|
||||||
|
endif()
|
||||||
|
|
||||||
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||||
|
|
||||||
target_include_directories(CemuCafe PUBLIC "../")
|
target_include_directories(CemuCafe PUBLIC "../")
|
||||||
|
|
1693
src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
Normal file
1693
src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,18 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "HW/Espresso/Recompiler/IML/IMLInstruction.h"
|
||||||
|
#include "../PPCRecompiler.h"
|
||||||
|
|
||||||
|
bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext);
|
||||||
|
void PPCRecompiler_cleanupAArch64Code(void* code, size_t size);
|
||||||
|
|
||||||
|
void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
|
||||||
|
|
||||||
|
// architecture specific constants
|
||||||
|
namespace IMLArchAArch64
|
||||||
|
{
|
||||||
|
static constexpr int PHYSREG_GPR_BASE = 0;
|
||||||
|
static constexpr int PHYSREG_GPR_COUNT = 25;
|
||||||
|
static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT;
|
||||||
|
static constexpr int PHYSREG_FPR_COUNT = 31;
|
||||||
|
}; // namespace IMLArchAArch64
|
|
@ -702,8 +702,10 @@ void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis&
|
||||||
{
|
{
|
||||||
IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
|
IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg);
|
||||||
|
|
||||||
|
#ifdef ARCH_X86_64
|
||||||
// x86 specific optimizations
|
// x86 specific optimizations
|
||||||
IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
|
IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis)
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
|
void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext)
|
||||||
|
|
|
@ -6,6 +6,9 @@
|
||||||
#include "IMLRegisterAllocatorRanges.h"
|
#include "IMLRegisterAllocatorRanges.h"
|
||||||
|
|
||||||
#include "../BackendX64/BackendX64.h"
|
#include "../BackendX64/BackendX64.h"
|
||||||
|
#ifdef __aarch64__
|
||||||
|
#include "../BackendAArch64/BackendAArch64.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
@ -127,23 +130,22 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe
|
||||||
fixedRegs.listInput.clear();
|
fixedRegs.listInput.clear();
|
||||||
fixedRegs.listOutput.clear();
|
fixedRegs.listOutput.clear();
|
||||||
|
|
||||||
// code below for aarch64 has not been tested
|
|
||||||
// The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
|
// The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
|
||||||
// on x86 this is used for instructions like SHL <reg>, CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
|
// on x86 this is used for instructions like SHL <reg>, CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
|
||||||
cemu_assert_unimplemented();
|
|
||||||
#ifdef 0
|
|
||||||
if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
||||||
{
|
{
|
||||||
const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
|
const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
|
||||||
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
|
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
|
||||||
IMLPhysRegisterSet volatileRegs;
|
IMLPhysRegisterSet volatileRegs;
|
||||||
for (int i=0; i<19; i++) // x0 to x18 are volatile
|
for (int i = 0; i <= 17; i++) // x0 to x17 are volatile
|
||||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
|
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
|
||||||
for (int i = 0; i <= 31; i++) // which float registers are volatile?
|
// v0-v7 & v16-v31 are volatile. For v8-v15 only the high 64 bits are volatile.
|
||||||
|
for (int i = 0; i <= 7; i++)
|
||||||
|
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
||||||
|
for (int i = 16; i <= 31; i++)
|
||||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
||||||
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
|
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// x86-64
|
// x86-64
|
||||||
|
|
|
@ -16,6 +16,9 @@
|
||||||
#include "IML/IML.h"
|
#include "IML/IML.h"
|
||||||
#include "IML/IMLRegisterAllocator.h"
|
#include "IML/IMLRegisterAllocator.h"
|
||||||
#include "BackendX64/BackendX64.h"
|
#include "BackendX64/BackendX64.h"
|
||||||
|
#ifdef __aarch64__
|
||||||
|
#include "BackendAArch64/BackendAArch64.h"
|
||||||
|
#endif
|
||||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||||
|
|
||||||
#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock
|
#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock
|
||||||
|
@ -220,12 +223,20 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(ARCH_X86_64)
|
||||||
// emit x64 code
|
// emit x64 code
|
||||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||||
if (x64GenerationSuccess == false)
|
if (x64GenerationSuccess == false)
|
||||||
{
|
{
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
bool aarch64GenerationSuccess = PPCRecompiler_generateAArch64Code(ppcRecFunc, &ppcImlGenContext);
|
||||||
|
if (aarch64GenerationSuccess == false)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (ActiveSettings::DumpRecompilerFunctionsEnabled())
|
if (ActiveSettings::DumpRecompilerFunctionsEnabled())
|
||||||
{
|
{
|
||||||
FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress)));
|
FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress)));
|
||||||
|
@ -270,6 +281,7 @@ void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenCont
|
||||||
for (auto& it : ppcImlGenContext.mappedRegs)
|
for (auto& it : ppcImlGenContext.mappedRegs)
|
||||||
raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first);
|
raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first);
|
||||||
|
|
||||||
|
#if defined(ARCH_X86_64)
|
||||||
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
||||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
|
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
|
||||||
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
|
gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
|
||||||
|
@ -301,6 +313,19 @@ void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenCont
|
||||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12);
|
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12);
|
||||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13);
|
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13);
|
||||||
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14);
|
fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14);
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64);
|
||||||
|
for (auto i = IMLArchAArch64::PHYSREG_GPR_BASE; i < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT; i++)
|
||||||
|
{
|
||||||
|
if (i == IMLArchAArch64::PHYSREG_GPR_BASE + 18)
|
||||||
|
continue; // Skip reserved platform register
|
||||||
|
gprPhysPool.SetAvailable(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64);
|
||||||
|
for (auto i = IMLArchAArch64::PHYSREG_FPR_BASE; i < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT; i++)
|
||||||
|
fprPhysPool.SetAvailable(i);
|
||||||
|
#endif
|
||||||
|
|
||||||
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
|
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
|
||||||
}
|
}
|
||||||
|
@ -679,8 +704,11 @@ void PPCRecompiler_init()
|
||||||
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024));
|
||||||
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW);
|
||||||
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true);
|
||||||
|
#ifdef ARCH_X86_64
|
||||||
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions();
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions();
|
||||||
|
#endif
|
||||||
PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
|
PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter
|
||||||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue