Cemu/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
2025-04-26 00:24:43 +02:00

2129 lines
78 KiB
C++

#include "IML.h"
#include "../PPCRecompiler.h"
#include "../PPCRecompilerIml.h"
#include "IMLRegisterAllocator.h"
#include "IMLRegisterAllocatorRanges.h"
#include "../BackendX64/BackendX64.h"
#include <boost/container/static_vector.hpp>
#include <boost/container/small_vector.hpp>
struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
{
IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
void TrackInstruction(sint32 index)
{
usageStart = std::min<sint32>(usageStart, index);
usageEnd = std::max<sint32>(usageEnd, index + 1); // exclusive index
}
sint32 usageStart;
sint32 usageEnd;
bool isProcessed{false};
IMLRegFormat regBaseFormat;
};
struct IMLRegisterAllocatorContext
{
IMLRegisterAllocatorParameters* raParam;
ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
std::unordered_map<IMLRegID, IMLRegFormat> regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary
// first pass
std::vector<std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>> perSegmentAbstractRanges;
// second pass
// helper methods
inline std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
{
return perSegmentAbstractRanges[imlSegment->momentaryIndex];
}
inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const
{
auto it = regIdToBaseFormat.find(regId);
cemu_assert_debug(it != regIdToBaseFormat.cend());
return it->second;
}
};
struct IMLFixedRegisters
{
struct Entry
{
Entry(IMLReg reg, IMLPhysRegisterSet physRegSet) : reg(reg), physRegSet(physRegSet) {}
IMLReg reg;
IMLPhysRegisterSet physRegSet;
};
boost::container::small_vector<Entry, 4> listInput; // fixed registers for instruction input edge
boost::container::small_vector<Entry, 4> listOutput; // fixed registers for instruction output edge
};
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
{
fixedRegs.listInput.clear();
fixedRegs.listOutput.clear();
// x86 specific logic is hardcoded for now
if(instruction->type == PPCREC_IML_TYPE_R_R_R)
{
if(instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
{
// todo: We can skip this if g_CPUFeatures.x86.bmi2 is set, but for now we just assume it's not so we can properly test increased register pressure
IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_ECX);
fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
}
}
else if(instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX);
fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps);
}
else if(instruction->type == PPCREC_IML_TYPE_CALL_IMM)
{
// parameters (todo)
cemu_assert_debug(!instruction->op_call_imm.regParam0.IsValid());
cemu_assert_debug(!instruction->op_call_imm.regParam1.IsValid());
cemu_assert_debug(!instruction->op_call_imm.regParam2.IsValid());
// return value
if(instruction->op_call_imm.regReturn.IsValid())
{
IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
cemu_assert_debug(isIntegerFormat); // float return values are still todo
IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX);
fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
}
// block volatile registers from being used on the output edge, this makes the RegAlloc store them during the call
IMLPhysRegisterSet ps;
if(!instruction->op_call_imm.regReturn.IsValid())
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RAX);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RCX);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RDX);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R8);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R9);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R10);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R11);
for(int i=0; i<=5; i++)
ps.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE+i); // YMM0-YMM5 are volatile
// for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
fixedRegs.listOutput.emplace_back(IMLREG_INVALID, ps);
}
}
uint32 PPCRecRA_getNextIterationIndex()
{
static uint32 recRACurrentIterationIndex = 0;
recRACurrentIterationIndex++;
return recRACurrentIterationIndex;
}
bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase)
{
if (currentSegment == imlSegmentLoopBase)
return true;
if (currentSegment->raInfo.lastIterationIndex == iterationIndex)
return currentSegment->raInfo.isPartOfProcessedLoop;
if (depth >= 9)
return false;
currentSegment->raInfo.lastIterationIndex = iterationIndex;
currentSegment->raInfo.isPartOfProcessedLoop = false;
if (currentSegment->nextSegmentIsUncertain)
return false;
if (currentSegment->nextSegmentBranchNotTaken)
{
if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex)
{
currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
}
}
if (currentSegment->nextSegmentBranchTaken)
{
if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex)
{
currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase);
}
}
if (currentSegment->raInfo.isPartOfProcessedLoop)
currentSegment->loopDepth++;
return currentSegment->raInfo.isPartOfProcessedLoop;
}
void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase)
{
uint32 iterationIndex = PPCRecRA_getNextIterationIndex();
imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex;
if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase))
{
imlSegmentLoopBase->loopDepth++;
}
}
void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
if (imlSegment->nextSegmentIsUncertain)
return;
// check if this segment has a branch that links to itself (tight loop)
if (imlSegment->nextSegmentBranchTaken == imlSegment)
{
// segment loops over itself
imlSegment->loopDepth++;
return;
}
// check if this segment has a branch that goes backwards (potential complex loop)
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex)
{
PPCRecRA_detectLoop(ppcImlGenContext, imlSegment);
}
}
#define SUBRANGE_LIST_SIZE (128)
sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition)
{
sint32 startInstructionIndex;
if(startPosition.ConnectsToPreviousSegment())
startInstructionIndex = 0;
else
startInstructionIndex = startPosition.GetInstructionIndex();
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
{
if (subrange->list_locations[i].index >= startInstructionIndex)
{
sint32 preciseIndex = subrange->list_locations[i].index * 2;
cemu_assert_debug(subrange->list_locations[i].isRead || subrange->list_locations[i].isWrite); // locations must have any access
// check read edge
if(subrange->list_locations[i].isRead)
{
if(preciseIndex >= startPosition.GetRaw())
return preciseIndex - startPosition.GetRaw();
}
// check write edge
if(subrange->list_locations[i].isWrite)
{
preciseIndex++;
if(preciseIndex >= startPosition.GetRaw())
return preciseIndex - startPosition.GetRaw();
}
}
}
cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
return 10001*2;
}
// returns -1 if there is no fixed register requirement on or after startPosition
sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess)
{
hasFixedAccess = false;
cemu_assert_debug(startPosition.IsInstructionIndex());
for(auto& fixedReqEntry : range->list_fixedRegRequirements)
{
if(fixedReqEntry.pos < startPosition)
continue;
if(fixedReqEntry.allowedReg.IsAvailable(physRegister))
{
hasFixedAccess = true;
return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw();
}
}
cemu_assert_debug(range->interval2.end.IsInstructionIndex());
return range->interval2.end.GetRaw() - startPosition.GetRaw();
}
sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister)
{
cemu_assert_debug(startPosition.IsInstructionIndex());
raInstructionEdge lastPos2;
lastPos2.Set(imlSegment->imlList.size(), false);
raInstructionEdge endPos;
endPos = startPosition + maxDistance;
if(endPos > lastPos2)
endPos = lastPos2;
IMLFixedRegisters fixedRegs;
if(startPosition.IsOnOutputEdge())
GetInstructionFixedRegisters(imlSegment->imlList.data()+startPosition.GetInstructionIndex(), fixedRegs);
for(raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos)
{
if(currentPos.IsOnInputEdge())
{
GetInstructionFixedRegisters(imlSegment->imlList.data()+currentPos.GetInstructionIndex(), fixedRegs);
}
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
for(auto& fixedRegLoc : fixedRegAccess)
{
if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
{
cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers
if(fixedRegLoc.physRegSet.IsAvailable(physRegister))
return currentPos.GetRaw() - startPosition.GetRaw();
}
}
}
return endPos.GetRaw() - startPosition.GetRaw();
}
// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex)
sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister)
{
cemu_assert_debug(startPosition.IsInstructionIndex());
sint32 minDistance = (sint32)imlSegment->imlList.size()*2 - startPosition.GetRaw();
// next
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr)
{
if (subrangeItr->GetPhysicalRegister() != physRegister)
{
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
if(subrangeItr->interval2.ContainsEdge(startPosition))
return 0;
if (subrangeItr->interval2.end < startPosition)
{
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
cemu_assert_debug(startPosition <= subrangeItr->interval2.start);
sint32 currentDist = subrangeItr->interval2.start.GetRaw() - startPosition.GetRaw();
minDistance = std::min(minDistance, currentDist);
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
return minDistance;
}
struct IMLRALivenessTimeline
{
IMLRALivenessTimeline()
{
}
// manually add an active range
void AddActiveRange(raLivenessRange* subrange)
{
activeRanges.emplace_back(subrange);
}
// remove all ranges from activeRanges with end <= instructionIndex
void ExpireRanges(sint32 instructionIndex)
{
__debugbreak(); // maybe replace calls with raInstructionEdge variant?
expiredRanges.clear();
size_t count = activeRanges.size();
for (size_t f = 0; f < count; f++)
{
raLivenessRange* liverange = activeRanges[f];
if (liverange->interval2.end.GetInstructionIndex() < instructionIndex) // <= to < since end is now inclusive
{
#ifdef CEMU_DEBUG_ASSERT
if (instructionIndex != RA_INTER_RANGE_END && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
assert_dbg(); // infinite subranges should not expire
#endif
expiredRanges.emplace_back(liverange);
// remove entry
activeRanges[f] = activeRanges[count-1];
f--;
count--;
}
}
if(count != activeRanges.size())
activeRanges.resize(count);
}
void ExpireRanges(raInstructionEdge expireUpTo)
{
expiredRanges.clear();
size_t count = activeRanges.size();
for (size_t f = 0; f < count; f++)
{
raLivenessRange* liverange = activeRanges[f];
if (liverange->interval2.end < expireUpTo) // this was <= but since end is not inclusive we need to use <
{
#ifdef CEMU_DEBUG_ASSERT
if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
assert_dbg(); // infinite subranges should not expire
#endif
expiredRanges.emplace_back(liverange);
// remove entry
activeRanges[f] = activeRanges[count-1];
f--;
count--;
}
}
if(count != activeRanges.size())
activeRanges.resize(count);
}
std::span<raLivenessRange*> GetExpiredRanges()
{
return { expiredRanges.data(), expiredRanges.size() };
}
std::span<raLivenessRange*> GetActiveRanges()
{
return { activeRanges.data(), activeRanges.size() };
}
raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId)
{
for(auto& it : activeRanges)
if(it->virtualRegister == regId)
return it;
return nullptr;
}
raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg)
{
cemu_assert_debug(physReg >= 0);
for(auto& it : activeRanges)
if(it->physicalRegister == physReg)
return it;
return nullptr;
}
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
private:
boost::container::small_vector<raLivenessRange*, 16> expiredRanges;
};
// mark occupied registers by any overlapping range as unavailable in physRegSet
void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
{
auto clusterRanges = range2->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges)
{
IMLSegment* imlSegment = subrange->imlSegment;
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr)
{
if (subrange == subrangeItr)
{
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
if(subrange->interval2.IsOverlapping(subrangeItr->interval2))
{
if (subrangeItr->GetPhysicalRegister() >= 0)
physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
}
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
}
}
bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->interval2.start < rhs->interval2.start; }
void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
{
raLivenessRange* subrangeList[4096+1];
sint32 count = 0;
// disassemble linked list
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
if (count >= 4096)
assert_dbg();
subrangeList[count] = subrangeItr;
count++;
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
if (count == 0)
{
imlSegment->raInfo.linkedList_allSubranges = nullptr;
return;
}
// sort
std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare);
// reassemble linked list
subrangeList[count] = nullptr;
imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
subrangeList[0]->link_allSegmentRanges.prev = nullptr;
subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
for (sint32 i = 1; i < count; i++)
{
subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
}
// validate list
#ifdef CEMU_DEBUG_ASSERT
sint32 count2 = 0;
subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge currentStartPosition;
currentStartPosition.SetRaw(RA_INTER_RANGE_START);
while (subrangeItr)
{
count2++;
if (subrangeItr->interval2.start < currentStartPosition)
assert_dbg();
currentStartPosition = subrangeItr->interval2.start;
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
if (count != count2)
assert_dbg();
#endif
}
std::unordered_map<IMLRegID, raLivenessRange*>& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
{
return imlSegment->raInfo.linkedList_perVirtualRegister;
}
raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
{
auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
return nullptr;
return it->second;
}
struct raFixedRegRequirementWithVGPR
{
raInstructionEdge pos;
IMLPhysRegisterSet allowedReg;
IMLRegID regId;
};
std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
{
std::vector<raFixedRegRequirementWithVGPR> frrList;
size_t index = 0;
IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size())
{
IMLFixedRegisters fixedRegs;
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
raInstructionEdge pos;
pos.Set(index, true);
for(auto& fixedRegAccess : fixedRegs.listInput)
{
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID());
}
pos = pos + 1;
for(auto& fixedRegAccess : fixedRegs.listOutput)
{
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid()?fixedRegAccess.reg.GetRegID():IMLRegID_INVALID);
}
index++;
}
return frrList;
}
boost::container::small_vector<raLivenessRange*, 8> IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg)
{
boost::container::small_vector<raLivenessRange*, 8> rangeList;
for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
if(!currentRange->interval2.ContainsEdge(pos))
continue;
IMLPhysRegisterSet allowedRegs;
if(!currentRange->GetAllowedRegistersEx(allowedRegs))
continue;
if(allowedRegs.IsAvailable(physReg))
rangeList.emplace_back(currentRange);
}
return rangeList;
}
void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
// first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
// todo - this can be optimized. Ranges only need to be split if there are conflicts with other segments. Note that below passes rely on the fact that this pass currently splits all ranges with fixed register requirements
for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
{
IMLPhysRegisterSet allowedRegs;
if(!currentRange->GetAllowedRegistersEx(allowedRegs))
{
currentRange = currentRange->link_allSegmentRanges.next;
continue;
}
if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment())
{
raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next;
PPCRecRA_explodeRange(ppcImlGenContext, currentRange);
currentRange = nextRange;
continue;
}
currentRange = currentRange->link_allSegmentRanges.next;
}
// second pass - look for ranges with conflicting fixed register requirements and split these too (locally)
for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if(currentRange->list_fixedRegRequirements.empty())
continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
if(!currentRange->GetAllowedRegistersEx(allowedRegs))
continue;
if(allowedRegs.HasAnyAvailable())
continue;
cemu_assert_unimplemented();
}
// third pass - assign fixed registers, split ranges if needed
std::vector<raFixedRegRequirementWithVGPR> frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
std::unordered_map<IMLPhysReg, IMLRegID> lastVGPR;
for(size_t i=0; i<frr.size(); i++)
{
raFixedRegRequirementWithVGPR& entry = frr[i];
// we currently only handle fixed register requirements with a single register
// with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers
cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable());
for(IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg+1))
{
// check if the assigned vGPR has changed
bool vgprHasChanged = false;
auto it = lastVGPR.find(physReg);
if(it != lastVGPR.end())
vgprHasChanged = it->second != entry.regId;
else
vgprHasChanged = true;
lastVGPR[physReg] = entry.regId;
if(!vgprHasChanged)
continue;
boost::container::small_vector<raLivenessRange*, 8> overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg);
if(entry.regId != IMLRegID_INVALID)
cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers
for(auto& range : overlappingRanges)
{
if(range->interval2.start < entry.pos)
{
PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true);
}
}
}
}
// finally iterate ranges and assign fixed registers
for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if(currentRange->list_fixedRegRequirements.empty())
continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment
if(!currentRange->GetAllowedRegistersEx(allowedRegs))
{
cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
continue;
}
cemu_assert_debug(allowedRegs.HasExactlyOneAvailable());
currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg());
}
// DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned
#ifdef CEMU_DEBUG_ASSERT
for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if(!currentRange->HasPhysicalRegister())
continue;
for(raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next)
{
if(currentRange == currentRange2)
continue;
if(currentRange->interval2.IsOverlapping(currentRange2->interval2))
{
cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister());
}
}
}
for(raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
{
IMLPhysRegisterSet allowedRegs;
if(!currentRange->GetAllowedRegistersEx(allowedRegs))
{
cemu_assert_debug(currentRange->list_fixedRegRequirements.empty());
continue;
}
cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister()));
}
#endif
}
// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output)
// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified
void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos)
{
// we ignore the instruction for now and just always make it a safe split position
cemu_assert_debug(pos.IsInstructionIndex());
if(pos.IsOnOutputEdge())
pos = pos - 1;
}
// convenience wrapper for IMLRA_MakeSafeSplitPosition
void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance)
{
cemu_assert_debug(startPos.IsInstructionIndex());
cemu_assert_debug(distance >= 0);
raInstructionEdge endPos = startPos + distance;
IMLRA_MakeSafeSplitPosition(imlSegment, endPos);
if(endPos < startPos)
{
distance = 0;
return;
}
distance = endPos.GetRaw() - startPos.GetRaw();
}
void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx);
class RASpillStrategy
{
public:
virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0;
sint32 GetCost()
{
return strategyCost;
}
protected:
void ResetCost()
{
strategyCost = INT_MAX;
}
sint32 strategyCost;
};
class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy
{
public:
void Reset()
{
localRangeHoleCutting.distance = -1;
localRangeHoleCutting.largestHoleSubrange = nullptr;
ResetCost();
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
{
raInstructionEdge currentRangeStart = currentRange->interval2.start;
sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance();
cemu_assert_debug(localRangeHoleCutting.distance == -1);
cemu_assert_debug(strategyCost == INT_MAX);
if(!currentRangeStart.ConnectsToPreviousSegment())
{
cemu_assert_debug(currentRangeStart.GetRaw() >= 0);
for (auto candidate : timeline.activeRanges)
{
if (candidate->interval2.ExtendsIntoNextSegment())
continue;
// new checks (Oct 2024):
if(candidate == currentRange)
continue;
if(candidate->GetPhysicalRegister() < 0)
continue;
if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
continue;
sint32 distance2 = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart);
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2);
if (distance2 < 2)
continue;
cemu_assert_debug(currentRangeStart.IsInstructionIndex());
distance2 = std::min<sint32>(distance2, imlSegment->imlList.size()*2 - currentRangeStart.GetRaw()); // limit distance to end of segment
// calculate split cost of candidate
sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
// calculate additional split cost of currentRange if hole is not large enough
if (distance2 < requiredSize2)
{
cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
// we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes)
cost += (requiredSize2 - distance2) / 10;
}
// compare cost with previous candidates
if (cost < strategyCost)
{
strategyCost = cost;
localRangeHoleCutting.distance = distance2;
localRangeHoleCutting.largestHoleSubrange = candidate;
}
}
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
cemu_assert_debug(strategyCost != INT_MAX);
sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance();
raInstructionEdge currentRangeStart = currentRange->interval2.start;
raInstructionEdge holeStartPosition = currentRangeStart;
raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance;
raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange;
if(collisionRange->interval2.start < holeStartPosition)
{
collisionRange = PPCRecRA_splitLocalSubrange2(nullptr, collisionRange, holeStartPosition, true);
cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point
cemu_assert_debug(!collisionRange || collisionRange->interval2.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough
}
else
{
cemu_assert_unimplemented(); // we still need to trim?
}
// we may also have to cut the current range to fit partially into the hole
if (requiredSize2 > localRangeHoleCutting.distance)
{
raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true);
if(tailRange)
{
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
tailRange->UnsetPhysicalRegister();
}
}
// verify that the hole is large enough
if(collisionRange)
{
cemu_assert_debug(!collisionRange->interval2.IsOverlapping(currentRange->interval2));
}
}
private:
struct
{
sint32 distance;
raLivenessRange* largestHoleSubrange;
}localRangeHoleCutting;
};
class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy
{
// split current range (this is generally only a good choice when the current range is long but has few usages)
public:
void Reset()
{
ResetCost();
availableRegisterHole.distance = -1;
availableRegisterHole.physRegister = -1;
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs)
{
sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance();
raInstructionEdge currentRangeStart = currentRange->interval2.start;
cemu_assert_debug(strategyCost == INT_MAX);
availableRegisterHole.distance = -1;
availableRegisterHole.physRegister = -1;
if (currentRangeStart.GetRaw() >= 0)
{
if (localAvailableRegsMask.HasAnyAvailable())
{
sint32 physRegItr = -1;
while (true)
{
physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1);
if (physRegItr < 0)
break;
if(!allowedRegs.IsAvailable(physRegItr))
continue;
// get size of potential hole for this register
sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr);
// some instructions may require the same register for another range, check the distance here
sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr);
if(distUntilFixedReg < distance)
distance = distUntilFixedReg;
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
if (distance < 2)
continue;
// calculate additional cost due to split
cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register?
sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
// add small additional cost for the remaining range (prefer larger holes)
cost += ((requiredSize2 - distance) / 2) / 10;
if (cost < strategyCost)
{
strategyCost = cost;
availableRegisterHole.distance = distance;
availableRegisterHole.physRegister = physRegItr;
}
}
}
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
cemu_assert_debug(strategyCost != INT_MAX);
raInstructionEdge currentRangeStart = currentRange->interval2.start;
// use available register
raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true);
if(tailRange)
{
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
tailRange->UnsetPhysicalRegister();
}
}
private:
struct
{
sint32 physRegister;
sint32 distance; // size of hole
}availableRegisterHole;
};
class RASpillStrategy_ExplodeRange : public RASpillStrategy
{
public:
void Reset()
{
ResetCost();
explodeRange.range = nullptr;
explodeRange.distance = -1;
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
{
raInstructionEdge currentRangeStart = currentRange->interval2.start;
if(currentRangeStart.ConnectsToPreviousSegment())
currentRangeStart.Set(0, true);
sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance();
cemu_assert_debug(strategyCost == INT_MAX);
explodeRange.range = nullptr;
explodeRange.distance = -1;
for (auto candidate : timeline.activeRanges)
{
if (!candidate->interval2.ExtendsIntoNextSegment())
continue;
// new checks (Oct 2024):
if(candidate == currentRange)
continue;
if(candidate->GetPhysicalRegister() < 0)
continue;
if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
continue;
sint32 distance = PPCRecRA_countDistanceUntilNextUse2(candidate, currentRangeStart);
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
if( distance < 2)
continue;
sint32 cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate);
// if the hole is not large enough, add cost of splitting current subrange
if (distance < requiredSize2)
{
cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
// add small additional cost for the remaining range (prefer larger holes)
cost += ((requiredSize2 - distance) / 2) / 10;
}
// compare with current best candidate for this strategy
if (cost < strategyCost)
{
strategyCost = cost;
explodeRange.distance = distance;
explodeRange.range = candidate;
}
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
raInstructionEdge currentRangeStart = currentRange->interval2.start;
if(currentRangeStart.ConnectsToPreviousSegment())
currentRangeStart.Set(0, true);
sint32 requiredSize2 = currentRange->interval2.GetPreciseDistance();
// explode range
PPCRecRA_explodeRange(nullptr, explodeRange.range);
// split current subrange if necessary
if( requiredSize2 > explodeRange.distance)
{
raLivenessRange* tailRange = PPCRecRA_splitLocalSubrange2(nullptr, currentRange, currentRangeStart+explodeRange.distance, true);
if(tailRange)
{
cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers
tailRange->UnsetPhysicalRegister();
}
}
}
private:
struct
{
raLivenessRange* range;
sint32 distance; // size of hole
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
}explodeRange;
};
class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
{
public:
void Reset()
{
ResetCost();
explodeRange.range = nullptr;
explodeRange.distance = -1;
}
void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs)
{
// explode the range with the least cost
cemu_assert_debug(strategyCost == INT_MAX);
cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1);
for(auto candidate : timeline.activeRanges)
{
if (!candidate->interval2.ExtendsIntoNextSegment())
continue;
// only select candidates that clash with current subrange
if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange)
continue;
// and also filter any that dont meet fixed register requirements
if(!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
continue;
sint32 cost;
cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate);
// compare with current best candidate for this strategy
if (cost < strategyCost)
{
strategyCost = cost;
explodeRange.distance = INT_MAX;
explodeRange.range = candidate;
}
}
// add current range as a candidate too
sint32 ownCost;
ownCost = PPCRecRARange_estimateCostAfterRangeExplode(currentRange);
if (ownCost < strategyCost)
{
strategyCost = ownCost;
explodeRange.distance = INT_MAX;
explodeRange.range = currentRange;
}
}
void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override
{
cemu_assert_debug(strategyCost != INT_MAX);
PPCRecRA_explodeRange(ctx, explodeRange.range);
}
private:
struct
{
raLivenessRange* range;
sint32 distance; // size of hole
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
}explodeRange;
};
// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies
void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
{
IMLSegment* seg = currentRange->imlSegment;
if(seg->imlList.empty())
return; // there can be no fixed register requirements if there are no instructions
raInstructionEdge firstPos = currentRange->interval2.start;
if(currentRange->interval2.start.ConnectsToPreviousSegment())
firstPos.SetRaw(0);
else if(currentRange->interval2.start.ConnectsToNextSegment())
firstPos.Set(seg->imlList.size()-1, false);
raInstructionEdge lastPos = currentRange->interval2.end;
if(currentRange->interval2.end.ConnectsToPreviousSegment())
lastPos.SetRaw(0);
else if(currentRange->interval2.end.ConnectsToNextSegment())
lastPos.Set(seg->imlList.size()-1, false);
cemu_assert_debug(firstPos <= lastPos);
IMLRegID ourRegId = currentRange->GetVirtualRegister();
IMLFixedRegisters fixedRegs;
if(firstPos.IsOnOutputEdge())
GetInstructionFixedRegisters(seg->imlList.data()+firstPos.GetInstructionIndex(), fixedRegs);
for(raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos)
{
if(currentPos.IsOnInputEdge())
{
GetInstructionFixedRegisters(seg->imlList.data()+currentPos.GetInstructionIndex(), fixedRegs);
}
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
for(auto& fixedRegLoc : fixedRegAccess)
{
if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet);
}
}
}
// filter out any registers along the range cluster
void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet)
{
cemu_assert_debug(currentRange->imlSegment == imlSegment);
if(currentRange->interval2.ExtendsPreviousSegment() || currentRange->interval2.ExtendsIntoNextSegment())
{
auto clusterRanges = currentRange->GetAllSubrangesInCluster();
for(auto& rangeIt : clusterRanges)
{
IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet);
if(!candidatePhysRegSet.HasAnyAvailable())
break;
}
return;
}
IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet);
}
bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{
// sort subranges ascending by start index
_sortSegmentAllSubrangesLinkedList(imlSegment);
IMLRALivenessTimeline livenessTimeline;
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge lastInstructionEdge;
lastInstructionEdge.SetRaw(RA_INTER_RANGE_END);
struct
{
RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting;
RASpillStrategy_AvailableRegisterHole availableRegisterHole;
RASpillStrategy_ExplodeRange explodeRange;
// for ranges that connect to follow up segments:
RASpillStrategy_ExplodeRangeInter explodeRangeInter;
}strategy;
while(subrangeItr)
{
raInstructionEdge currentRangeStart = subrangeItr->interval2.start; // used to be currentIndex before refactor
PPCRecRA_debugValidateSubrange(subrangeItr);
// below used to be: std::min<sint32>(currentIndex, RA_INTER_RANGE_END-1)
livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges
// note: The logic here is complicated in regards to whether the instruction index should be inclusive or exclusive. Find a way to simplify?
// if subrange already has register assigned then add it to the active list and continue
if (subrangeItr->GetPhysicalRegister() >= 0)
{
// verify if register is actually available
#ifdef CEMU_DEBUG_ASSERT
for (auto& liverangeItr : livenessTimeline.activeRanges)
{
// check for register mismatch
cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister());
}
#endif
livenessTimeline.AddActiveRange(subrangeItr);
subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue;
}
// ranges with fixed register requirements should already have a phys register assigned
if(!subrangeItr->list_fixedRegRequirements.empty())
{
cemu_assert_debug(subrangeItr->HasPhysicalRegister());
}
// find free register for current subrangeItr and segment
IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister());
IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat);
cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type
IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet);
cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler)
candidatePhysRegSet &= allowedRegs;
for (auto& liverangeItr : livenessTimeline.activeRanges)
{
cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0);
candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister());
}
// check intersections with other ranges and determine allowed registers
IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
if(candidatePhysRegSet.HasAnyAvailable())
{
// check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments)
PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet);
}
// some target instructions may enforce specific registers (e.g. common on X86 where something like SHL <reg>, CL forces CL as the count register)
// we determine the list of allowed registers here
// this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet)
if (candidatePhysRegSet.HasAnyAvailable())
{
IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet);
}
if(candidatePhysRegSet.HasAnyAvailable())
{
// use free register
subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg());
livenessTimeline.AddActiveRange(subrangeItr);
subrangeItr = subrangeItr->link_allSegmentRanges.next; // next
continue;
}
// there is no free register for the entire range
// evaluate different strategies of splitting ranges to free up another register or shorten the current range
strategy.localRangeHoleCutting.Reset();
strategy.availableRegisterHole.Reset();
strategy.explodeRange.Reset();
// cant assign register
// there might be registers available, we just can't use them due to range conflicts
RASpillStrategy* selectedStrategy = nullptr;
auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy)
{
if(newStrategy.GetCost() == INT_MAX)
return;
if(selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost())
selectedStrategy = &newStrategy;
};
if (!subrangeItr->interval2.ExtendsIntoNextSegment())
{
// range ends in current segment, use local strategies
// evaluate strategy: Cut hole into local subrange
strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
SelectStrategyIfBetter(strategy.localRangeHoleCutting);
// evaluate strategy: Split current range to fit in available holes
// todo - are checks required to avoid splitting on the suffix instruction?
strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs);
SelectStrategyIfBetter(strategy.availableRegisterHole);
// evaluate strategy: Explode inter-segment ranges
strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
SelectStrategyIfBetter(strategy.explodeRange);
}
else // if subrangeItr->interval2.ExtendsIntoNextSegment()
{
strategy.explodeRangeInter.Reset();
strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs);
SelectStrategyIfBetter(strategy.explodeRangeInter);
}
// choose strategy
if(selectedStrategy)
{
selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr);
}
else
{
// none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy
cemu_assert_debug(subrangeItr->interval2.ExtendsPreviousSegment());
// alternative strategy if we have no other choice: explode current range
PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr);
}
return false;
}
return true;
}
void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext)
{
// start with frequently executed segments first
sint32 maxLoopDepth = 0;
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth);
}
// assign fixed registers first
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt);
while (true)
{
bool done = false;
for (sint32 d = maxLoopDepth; d >= 0; d--)
{
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
{
if (segIt->loopDepth != d)
continue;
done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt);
if (done == false)
break;
}
if (done == false)
break;
}
if (done)
break;
}
}
void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext)
{
// insert empty segments after every non-taken branch if the linked segment has more than one input
// this gives the register allocator more room to create efficient spill code
size_t segmentIndex = 0;
while (segmentIndex < ppcImlGenContext->segmentList2.size())
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
if (imlSegment->nextSegmentIsUncertain)
{
segmentIndex++;
continue;
}
if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr)
{
segmentIndex++;
continue;
}
if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1)
{
segmentIndex++;
continue;
}
if (imlSegment->nextSegmentBranchNotTaken->isEnterable)
{
segmentIndex++;
continue;
}
PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1);
IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0];
IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1];
IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken;
IMLSegment_RemoveLink(imlSegmentP0, nextSegment);
IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment);
IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
segmentIndex++;
}
// detect loops
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
imlSegment->momentaryIndex = s;
}
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s];
PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment);
}
}
IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
{
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
auto it = segMap.find(regId);
return it != segMap.end() ? &it->second : nullptr;
}
// scan instructions and establish register usage range for segment
void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
size_t instructionIndex = 0;
IMLUsedRegisters gprTracking;
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
while (instructionIndex < imlSegment->imlList.size())
{
imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
IMLRegID gprId = gprReg.GetRegID();
auto it = segDistMap.find(gprId);
if (it == segDistMap.end())
{
segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1);
ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat());
}
else
{
it->second.TrackInstruction(instructionIndex);
#ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same
#endif
}
});
instructionIndex++;
}
}
void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
{
// for each register calculate min/max index of usage range within each segment
size_t dbgIndex = 0;
for (IMLSegment* segIt : ctx.deprGenContext->segmentList2)
{
cemu_assert_debug(segIt->momentaryIndex == dbgIndex);
IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt);
dbgIndex++;
}
}
raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name)
{
IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR);
if (!abstractRange)
return nullptr;
if (abstractRange->isProcessed)
{
// return already existing segment
raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR);
cemu_assert_debug(existingRange);
return existingRange;
}
abstractRange->isProcessed = true;
// create subrange
#ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
#endif
cemu_assert_debug(
(abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) ||
abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger
sint32 inclusiveEnd = abstractRange->usageEnd;
if(inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END)
inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive
raInterval interval;
interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true);
raLivenessRange* subrange = PPCRecRA_createSubrange2(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end);
// traverse forward
if (abstractRange->usageEnd == RA_INTER_RANGE_END)
{
if (imlSegment->nextSegmentBranchTaken)
{
IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR);
if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START)
{
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name);
subrange->subrangeBranchTaken->previousRanges.push_back(subrange);
cemu_assert_debug(subrange->subrangeBranchTaken->interval2.ExtendsPreviousSegment());
}
}
if (imlSegment->nextSegmentBranchNotTaken)
{
IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR);
if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START)
{
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name);
subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange);
cemu_assert_debug(subrange->subrangeBranchNotTaken->interval2.ExtendsPreviousSegment());
}
}
}
// traverse backward
if (abstractRange->usageStart == RA_INTER_RANGE_START)
{
for (auto& it : imlSegment->list_prevSegments)
{
IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR);
if(!prevRange)
continue;
if (prevRange->usageEnd == RA_INTER_RANGE_END)
PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
}
}
// for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction
// this is due to range load instructions being inserted before the suffix instruction
// todo - currently later steps might break this assumption, look into this
// if (subrange->interval2.ExtendsIntoNextSegment())
// {
// if (imlSegment->HasSuffixInstruction())
// {
// cemu_assert_debug(subrange->interval2.start.GetInstructionIndex() <= imlSegment->GetSuffixInstructionIndex());
// }
// }
return subrange;
}
// take abstract range data and create LivenessRanges
void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
const std::unordered_map<IMLRegID, raLivenessRange*>& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet)
{
raLivenessRange* subrange = regToSubrange.find(regId)->second;
cemu_assert_debug(subrange);
raFixedRegRequirement tmp;
tmp.pos.Set(instructionIndex, isInput);
tmp.allowedReg = physRegSet;
if(subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos)
subrange->list_fixedRegRequirements.push_back(tmp);
};
// convert abstract min-max ranges to liveness range objects
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
for (auto& it : segMap)
{
if(it.second.isProcessed)
continue;
IMLRegID regId = it.first;
PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second);
}
// fill created ranges with read/write location indices
// note that at this point there is only one range per register per segment
// and the algorithm below relies on this
size_t index = 0;
IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size())
{
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten);
cemu_assert_debug(!subrange->interval2.start.IsInstructionIndex() || subrange->interval2.start.GetInstructionIndex() <= index);
cemu_assert_debug(!subrange->interval2.end.IsInstructionIndex() || subrange->interval2.end.GetInstructionIndex() >= index);
});
// check fixed register requirements
IMLFixedRegisters fixedRegs;
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
for(auto& fixedRegAccess : fixedRegs.listInput)
{
if(fixedRegAccess.reg != IMLREG_INVALID)
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet);
}
for(auto& fixedRegAccess : fixedRegs.listOutput)
{
if(fixedRegAccess.reg != IMLREG_INVALID)
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet);
}
index++;
}
}
void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
{
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
auto it = segDistMap.find(regId);
if (it == segDistMap.end())
{
sint32 startIndex;
if(imlSegment->HasSuffixInstruction())
startIndex = imlSegment->GetSuffixInstructionIndex();
else
startIndex = RA_INTER_RANGE_END;
segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END);
}
else
{
it->second.usageEnd = RA_INTER_RANGE_END;
}
}
void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId)
{
auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
auto it = segDistMap.find(regId);
if (it == segDistMap.end())
{
segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START);
}
else
{
it->second.usageStart = RA_INTER_RANGE_START;
}
// propagate backwards
for (auto& it : imlSegment->list_prevSegments)
{
IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId);
}
}
void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth)
{
#ifdef CEMU_DEBUG_ASSERT
if (routeDepth < 2)
assert_dbg();
#endif
// extend starting range to end of segment
IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId);
// extend all the connecting segments in both directions
for (sint32 i = 1; i < (routeDepth - 1); i++)
{
IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId);
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId);
}
// extend the final segment towards the beginning
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId);
}
void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth)
{
if (routeDepth >= 64)
{
cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n");
return;
}
route[routeDepth] = currentSegment;
IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID);
if (!range)
{
// measure distance over entire segment
distanceLeft -= (sint32)currentSegment->imlList.size();
if (distanceLeft > 0)
{
if (currentSegment->nextSegmentBranchNotTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1);
if (currentSegment->nextSegmentBranchTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1);
}
return;
}
else
{
// measure distance to range
if (range->usageStart == RA_INTER_RANGE_END)
{
if (distanceLeft < (sint32)currentSegment->imlList.size())
return; // range too far away
}
else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft)
return; // out of range
// found close range -> connect ranges
IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1);
}
}
void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID)
{
cemu_assert_debug(range->usageEnd >= 0);
// count instructions to end of initial segment
sint32 instructionsUntilEndOfSeg;
if (range->usageEnd == RA_INTER_RANGE_END)
instructionsUntilEndOfSeg = 0;
else
instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd;
cemu_assert_debug(instructionsUntilEndOfSeg >= 0);
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
if (remainingScanDist <= 0)
return; // can't reach end
IMLSegment* route[64];
route[0] = currentSegment;
if (currentSegment->nextSegmentBranchNotTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1);
if (currentSegment->nextSegmentBranchTaken)
_IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1);
}
void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
for (auto& it : segMap)
{
PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first);
}
#ifdef CEMU_DEBUG_ASSERT
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
assert_dbg();
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
assert_dbg();
#endif
}
void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
std::vector<IMLSegment*> list_segments;
std::vector<bool> list_processedSegment;
size_t segmentCount = ctx.deprGenContext->segmentList2.size();
list_segments.reserve(segmentCount+1);
list_processedSegment.resize(segmentCount);
auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {list_processedSegment[seg->momentaryIndex] = true; };
auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool { return list_processedSegment[seg->momentaryIndex]; };
markSegProcessed(imlSegment);
sint32 index = 0;
list_segments.push_back(imlSegment);
while (index < list_segments.size())
{
IMLSegment* currentSegment = list_segments[index];
PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment);
// follow flow
if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken))
{
markSegProcessed(currentSegment->nextSegmentBranchNotTaken);
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
}
if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken))
{
markSegProcessed(currentSegment->nextSegmentBranchTaken);
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
}
index++;
}
}
void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
if (!imlSegment->list_prevSegments.empty())
continue; // not an entry/standalone segment
PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment);
}
}
void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
auto localLoopDepth = imlSegment->loopDepth;
if (localLoopDepth <= 0)
continue; // not inside a loop
// look for loop exit
bool hasLoopExit = false;
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
{
hasLoopExit = true;
}
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
{
hasLoopExit = true;
}
if (hasLoopExit == false)
continue;
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment);
for (auto& it : segMap)
{
if(it.second.usageEnd != RA_INTER_RANGE_END)
continue;
if (imlSegment->nextSegmentBranchTaken)
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first);
if (imlSegment->nextSegmentBranchNotTaken)
IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first);
}
}
}
void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
{
IMLRA_MergeCloseAbstractRanges(ctx);
// extra pass to move register loads and stores out of loops
IMLRA_ExtendAbstractRangesOutOfLoops(ctx);
// calculate liveness ranges
for (auto& segIt : ctx.deprGenContext->segmentList2)
IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt);
}
void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
{
bool isRead = false;
bool isWritten = false;
bool isOverwritten = false;
for (auto& location : subrange->list_locations)
{
if (location.isRead)
{
isRead = true;
}
if (location.isWrite)
{
if (isRead == false)
isOverwritten = true;
isWritten = true;
}
}
subrange->_noLoad = isOverwritten;
subrange->hasStore = isWritten;
if (subrange->interval2.ExtendsPreviousSegment())
subrange->_noLoad = true;
}
struct subrangeEndingInfo_t
{
//boost::container::small_vector<raLivenessSubrange_t*, 32> subrangeList2;
raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
sint32 subrangeCount;
bool hasUndefinedEndings;
};
void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
{
if (depth >= 30)
{
info->hasUndefinedEndings = true;
return;
}
if (subrange->lastIterationIndex == iterationIndex)
return; // already processed
subrange->lastIterationIndex = iterationIndex;
if (subrange->hasStoreDelayed)
return; // no need to traverse this subrange
IMLSegment* imlSegment = subrange->imlSegment;
if (!subrange->interval2.ExtendsIntoNextSegment())
{
// ending segment
if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
{
info->hasUndefinedEndings = true;
return;
}
else
{
info->subrangeList[info->subrangeCount] = subrange;
info->subrangeCount++;
}
return;
}
// traverse next subranges in flow
if (imlSegment->nextSegmentBranchNotTaken)
{
if (subrange->subrangeBranchNotTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
}
}
if (imlSegment->nextSegmentBranchTaken)
{
if (subrange->subrangeBranchTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
}
}
}
static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
{
if (!subrange->interval2.ExtendsIntoNextSegment())
return;
// analyze data flow across segments (if this segment has writes)
if (subrange->hasStore)
{
subrangeEndingInfo_t writeEndingInfo;
writeEndingInfo.subrangeCount = 0;
writeEndingInfo.hasUndefinedEndings = false;
_findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo);
if (writeEndingInfo.hasUndefinedEndings == false)
{
// get cost of delaying store into endings
sint32 delayStoreCost = 0;
bool alreadyStoredInAllEndings = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
if( subrangeItr->hasStore )
continue; // this ending already stores, no extra cost
alreadyStoredInAllEndings = false;
sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment);
delayStoreCost = std::max(storeCost, delayStoreCost);
}
if (alreadyStoredInAllEndings)
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
}
else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment))
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
subrangeItr->hasStore = true;
}
}
}
}
}
void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
{
// this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore
// track read/write dependencies per segment
for(auto& seg : ppcImlGenContext->segmentList2)
{
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
while(subrange)
{
IMLRA_AnalyzeSubrangeDataDependency(subrange);
subrange = subrange->link_allSegmentRanges.next;
}
}
// propagate information across segment boundaries
for(auto& seg : ppcImlGenContext->segmentList2)
{
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
while(subrange)
{
IMLRA_AnalyzeRangeDataFlow(subrange);
subrange = subrange->link_allSegmentRanges.next;
}
}
}
/* Generate move instructions */
inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
{
return IMLReg(baseFormat, baseFormat, 0, regId);
}
#define DEBUG_RA_INSTRUCTION_GEN 0
// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts
void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
std::unordered_map<IMLRegID, IMLRegID> virtId2PhysReg;
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge currentEdge;
for(size_t i=0; i<imlSegment->imlList.size(); i++)
{
currentEdge.Set(i, false); // set to instruction index on output edge
// activate ranges which begin before or during this instruction
while(currentRange && currentRange->interval2.start <= currentEdge)
{
cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict
virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister();
activeRanges.push_back(currentRange);
currentRange = currentRange->link_allSegmentRanges.next;
}
// rewrite registers
imlSegment->imlList[i].RewriteGPR(virtId2PhysReg);
// deactivate ranges which end during this instruction
auto it = activeRanges.begin();
while(it != activeRanges.end())
{
if((*it)->interval2.end <= currentEdge)
{
virtId2PhysReg.erase((*it)->GetVirtualRegister());
it = activeRanges.erase(it);
}
else
++it;
}
}
}
void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{
IMLRA_RewriteRegisters(ctx, imlSegment);
#if DEBUG_RA_INSTRUCTION_GEN
cemuLog_log(LogType::Force, "");
cemuLog_log(LogType::Force, "[Seg before RA]");
IMLDebug_DumpSegment(nullptr, imlSegment, true);
#endif
bool hadSuffixInstruction = imlSegment->HasSuffixInstruction();
std::vector<IMLInstruction> rebuiltInstructions;
sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0);
if(imlSegment->imlList.empty())
{
// empty segments need special handling (todo - look into merging this with the core logic below eventually)
// store all ranges
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
while(currentRange)
{
if(currentRange->hasStore)
rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()));
currentRange = currentRange->link_allSegmentRanges.next;
}
// load ranges
currentRange = imlSegment->raInfo.linkedList_allSubranges;
while(currentRange)
{
if(!currentRange->_noLoad)
{
cemu_assert_debug(currentRange->interval2.ExtendsIntoNextSegment());
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
}
currentRange = currentRange->link_allSegmentRanges.next;
}
imlSegment->imlList = std::move(rebuiltInstructions);
return;
}
// make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?)
{
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
raInstructionEdge edge;
if(imlSegment->HasSuffixInstruction())
edge.Set(numInstructionsWithoutSuffix, true);
else
edge.Set(numInstructionsWithoutSuffix-1, false);
while(currentRange)
{
if(!currentRange->interval2.IsNextSegmentOnly() && currentRange->interval2.end > edge)
{
currentRange->interval2.SetEnd(edge);
}
currentRange = currentRange->link_allSegmentRanges.next;
}
}
#if DEBUG_RA_INSTRUCTION_GEN
cemuLog_log(LogType::Force, "");
cemuLog_log(LogType::Force, "--- Intermediate liveness info ---");
{
raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges;
while(dbgRange)
{
cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString());
dbgRange = dbgRange->link_allSegmentRanges.next;
}
}
#endif
boost::container::small_vector<raLivenessRange*, 64> activeRanges;
// first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early
raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges;
// make all ranges active that start on RA_INTER_RANGE_START
while(currentRange && currentRange->interval2.start.ConnectsToPreviousSegment())
{
activeRanges.push_back(currentRange);
currentRange = currentRange->link_allSegmentRanges.next;
}
// store all ranges that end before the first output edge (includes RA_INTER_RANGE_START)
auto it = activeRanges.begin();
raInstructionEdge firstOutputEdge;
firstOutputEdge.Set(0, false);
while(it != activeRanges.end())
{
if( (*it)->interval2.end < firstOutputEdge)
{
raLivenessRange* storedRange = *it;
if(storedRange->hasStore)
rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
it = activeRanges.erase(it);
continue;
}
++it;
}
sint32 numInstructions = (sint32)imlSegment->imlList.size();
for(sint32 i=0; i<numInstructions; i++)
{
raInstructionEdge curEdge;
// input edge
curEdge.SetRaw(i*2+1); // +1 to include ranges that start at the output of the instruction
while(currentRange && currentRange->interval2.start <= curEdge)
{
if(!currentRange->_noLoad)
{
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
}
activeRanges.push_back(currentRange);
currentRange = currentRange->link_allSegmentRanges.next;
}
// copy instruction
rebuiltInstructions.push_back(imlSegment->imlList[i]);
// output edge
curEdge.SetRaw(i*2+1+1);
// also store ranges that end on the next input edge, we handle this by adding an extra 1 above
auto it = activeRanges.begin();
while(it != activeRanges.end())
{
if( (*it)->interval2.end <= curEdge)
{
// range expires
// we cant erase it from virtId2PhysReg right away because a store might happen before the last use (the +1 thing above)
// todo - check hasStore
raLivenessRange* storedRange = *it;
if(storedRange->hasStore)
{
cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix
rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister()));
}
it = activeRanges.erase(it);
continue;
}
++it;
}
}
// if there is no suffix instruction we currently need to handle the final loads here
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
if(imlSegment->HasSuffixInstruction())
{
cemu_assert_debug(!currentRange); // currentRange should be NULL?
for(auto& remainingRange : activeRanges)
{
cemu_assert_debug(!remainingRange->hasStore);
}
}
else
{
for(auto& remainingRange : activeRanges)
{
cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored
}
while(currentRange)
{
cemu_assert_debug(currentRange->interval2.IsNextSegmentOnly());
cemu_assert_debug(!currentRange->_noLoad);
rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName());
currentRange = currentRange->link_allSegmentRanges.next;
}
}
imlSegment->imlList = std::move(rebuiltInstructions);
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
#if DEBUG_RA_INSTRUCTION_GEN
cemuLog_log(LogType::Force, "");
cemuLog_log(LogType::Force, "[Seg after RA]");
IMLDebug_DumpSegment(nullptr, imlSegment, false);
#endif
}
void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment);
}
}
void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
{
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
{
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr)
{
PPCRecRA_debugValidateSubrange(subrangeItr);
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
}
}
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
{
IMLRegisterAllocatorContext ctx;
ctx.raParam = &raParam;
ctx.deprGenContext = ppcImlGenContext;
IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext);
ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment
ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size());
IMLRA_CalculateLivenessRanges(ctx);
IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx);
IMLRA_AssignRegisters(ctx, ppcImlGenContext);
DbgVerifyAllRanges(ctx); // DEBUG
IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext);
IMLRA_GenerateMoveInstructions(ctx);
IMLRA_DeleteAllRanges(ppcImlGenContext);
}