mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-06 23:11:18 +12:00
PPCRec: Emit x86 movd for non-AVX + more restructuring
This commit is contained in:
parent
411a83799c
commit
da08eda506
13 changed files with 589 additions and 584 deletions
|
@ -14,6 +14,8 @@
|
|||
#include "util/helpers/helpers.h"
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IML.h"
|
||||
|
||||
struct PPCInvalidationRange
|
||||
{
|
||||
MPTR startAddress;
|
||||
|
@ -127,6 +129,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
|||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
|
||||
{
|
||||
|
@ -153,21 +156,27 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||
ppcRecFunc->ppcAddress = range.startAddress;
|
||||
ppcRecFunc->ppcSize = range.length;
|
||||
|
||||
// generate intermediate code
|
||||
ppcImlGenContext_t ppcImlGenContext = { 0 };
|
||||
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
|
||||
if (compiledSuccessfully == false)
|
||||
{
|
||||
// todo: Free everything
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
delete ppcRecFunc;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// apply passes
|
||||
if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext))
|
||||
{
|
||||
delete ppcRecFunc;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// emit x64 code
|
||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (x64GenerationSuccess == false)
|
||||
{
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -183,11 +192,82 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
|
||||
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
|
||||
}
|
||||
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return ppcRecFunc;
|
||||
}
|
||||
|
||||
void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
||||
{
|
||||
PPCRecompiler_FixLoops(ppcImlGenContext);
|
||||
|
||||
// isolate entry points from function flow (enterable segments must not be the target of any other segment)
|
||||
// this simplifies logic during register allocation
|
||||
PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
|
||||
|
||||
// if GQRs can be predicted, optimize PSQ load/stores
|
||||
PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext);
|
||||
|
||||
// count number of used registers
|
||||
uint32 numLoadedFPRRegisters = 0;
|
||||
for (uint32 i = 0; i < 255; i++)
|
||||
{
|
||||
if (ppcImlGenContext.mappedFPRRegister[i])
|
||||
numLoadedFPRRegisters++;
|
||||
}
|
||||
|
||||
// insert name store instructions at the end of each segment but before branch instructions
|
||||
for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
if (segIt->imlList.size() == 0)
|
||||
continue; // ignore empty segments
|
||||
// analyze segment for register usage
|
||||
IMLUsedRegisters registersUsed;
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
segIt->imlList[i].CheckRegisterUsage(®istersUsed);
|
||||
sint32 accessedTempReg[5];
|
||||
// intermediate FPRs
|
||||
accessedTempReg[0] = registersUsed.readFPR1;
|
||||
accessedTempReg[1] = registersUsed.readFPR2;
|
||||
accessedTempReg[2] = registersUsed.readFPR3;
|
||||
accessedTempReg[3] = registersUsed.readFPR4;
|
||||
accessedTempReg[4] = registersUsed.writtenFPR1;
|
||||
for (sint32 f = 0; f < 5; f++)
|
||||
{
|
||||
if (accessedTempReg[f] == -1)
|
||||
continue;
|
||||
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]];
|
||||
if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32)
|
||||
{
|
||||
segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// merge certain float load+store patterns (must happen before FPR register remapping)
|
||||
PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext);
|
||||
// delay byte swapping for certain load+store patterns
|
||||
PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext);
|
||||
|
||||
if (numLoadedFPRRegisters > 0)
|
||||
{
|
||||
if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext);
|
||||
|
||||
// remove redundant name load and store instructions
|
||||
PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
|
||||
PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector<std::pair<MPTR, uint32>>& entryPoints)
|
||||
{
|
||||
// update jump table
|
||||
|
@ -511,42 +591,6 @@ void PPCRecompiler_init()
|
|||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||
|
||||
// init x64 recompiler instance data
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
|
||||
|
||||
// setup GQR scale tables
|
||||
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue