From 675c802cc160bbe954b0189e96830578ebfb2eb3 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 1 Sep 2024 02:52:45 +0200 Subject: [PATCH] PPCRec: Simplify RA code and clean it up a bit --- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 27 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 413 +++++++++--------- .../IML/IMLRegisterAllocatorRanges.cpp | 288 +++++++----- .../IML/IMLRegisterAllocatorRanges.h | 77 +++- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 55 +-- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 15 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 5 - 7 files changed, 466 insertions(+), 414 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 192f06a1..cca8b61e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -94,23 +94,12 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml debug_printf(" "); index++; } - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { if (offset == subrangeItr->start.index) { - if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index) - { - debug_printf("*%-2d", subrangeItr->range->virtualRegister); - } - else - { - debug_printf("|%-2d", subrangeItr->range->virtualRegister); - } - } - else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index ) - { - debug_printf("* "); + debug_printf("|%-2d", subrangeItr->GetVirtualRegister()); } else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) { @@ -122,7 +111,7 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml } index += 3; // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } } @@ -501,19 +490,19 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool if (printLivenessRangeInfo) { debug_printf("Ranges-VirtReg "); - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - debug_printf("v%-2d", subrangeItr->range->virtualRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + debug_printf("v%-2d", subrangeItr->GetVirtualRegister()); + subrangeItr = subrangeItr->link_allSegmentRanges.next; } debug_printf("\n"); debug_printf("Ranges-PhysReg "); subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { - debug_printf("p%-2d", subrangeItr->range->physicalRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + debug_printf("p%-2d", subrangeItr->GetPhysicalRegister()); + subrangeItr = subrangeItr->link_allSegmentRanges.next; } debug_printf("\n"); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 96f8d9f0..a59b88bd 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -50,10 +50,9 @@ struct IMLRegisterAllocatorContext }; -uint32 recRACurrentIterationIndex = 0; - uint32 PPCRecRA_getNextIterationIndex() { + static uint32 recRACurrentIterationIndex = 0; recRACurrentIterationIndex++; return recRACurrentIterationIndex; } @@ -120,7 +119,7 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml #define SUBRANGE_LIST_SIZE (128) -sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, sint32 startIndex) +sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessRange* subrange, sint32 startIndex) { for (sint32 i = 0; i < subrange->list_locations.size(); i++) { @@ -135,12 +134,12 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe { sint32 minDistance = INT_MAX; // next - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { - if (subrangeItr->range->physicalRegister != physRegister) + if (subrangeItr->GetPhysicalRegister() != physRegister) { - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index) @@ -149,7 +148,7 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe { minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex)); } - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } return minDistance; } @@ -175,7 +174,7 @@ struct IMLRALivenessTimeline } // manually add an active range - void AddActiveRange(raLivenessSubrange_t* subrange) + void AddActiveRange(raLivenessRange* subrange) { activeRanges.emplace_back(subrange); } @@ -187,7 +186,7 @@ struct IMLRALivenessTimeline size_t count = activeRanges.size(); for (size_t f = 0; f < count; f++) { - raLivenessSubrange_t* liverange = activeRanges[f]; + raLivenessRange* liverange = activeRanges[f]; if (liverange->end.index <= instructionIndex) { #ifdef CEMU_DEBUG_ASSERT @@ -205,18 +204,18 @@ struct IMLRALivenessTimeline activeRanges.resize(count); } - std::span GetExpiredRanges() + std::span GetExpiredRanges() { return { expiredRanges.data(), expiredRanges.size() }; } - boost::container::small_vector activeRanges; + boost::container::small_vector activeRanges; private: - boost::container::small_vector expiredRanges; + boost::container::small_vector expiredRanges; }; -bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) +bool IsRangeOverlapping(raLivenessRange* rangeA, raLivenessRange* rangeB) { if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index) return true; @@ -228,39 +227,40 @@ bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rang } // mark occupied registers by any overlapping range as unavailable in physRegSet -void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) +void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet) { - for (auto& subrange : range->list_subranges) + auto clusterRanges = range2->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) { IMLSegment* imlSegment = subrange->imlSegment; - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrange == subrangeItr) { // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } if(IsRangeOverlapping(subrange, subrangeItr)) { - if (subrangeItr->range->physicalRegister >= 0) - physRegSet.SetReserved(subrangeItr->range->physicalRegister); + if (subrangeItr->GetPhysicalRegister() >= 0) + physRegSet.SetReserved(subrangeItr->GetPhysicalRegister()); } // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } } } -bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } +bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->start.index < rhs->start.index; } void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) { - raLivenessSubrange_t* subrangeList[4096+1]; + raLivenessRange* subrangeList[4096+1]; sint32 count = 0; // disassemble linked list - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { if (count >= 4096) @@ -268,7 +268,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) subrangeList[count] = subrangeItr; count++; // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } if (count == 0) { @@ -280,12 +280,12 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) // reassemble linked list subrangeList[count] = nullptr; imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; - subrangeList[0]->link_segmentSubrangesGPR.prev = nullptr; - subrangeList[0]->link_segmentSubrangesGPR.next = subrangeList[1]; + subrangeList[0]->link_allSegmentRanges.prev = nullptr; + subrangeList[0]->link_allSegmentRanges.next = subrangeList[1]; for (sint32 i = 1; i < count; i++) { - subrangeList[i]->link_segmentSubrangesGPR.prev = subrangeList[i - 1]; - subrangeList[i]->link_segmentSubrangesGPR.next = subrangeList[i + 1]; + subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1]; + subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1]; } // validate list #ifdef CEMU_DEBUG_ASSERT @@ -299,40 +299,40 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) assert_dbg(); currentStartIndex = subrangeItr->start.index; // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } if (count != count2) assert_dbg(); #endif } -std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) +std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) { - return imlSegment->raInfo.linkedList_perVirtualGPR2; + return imlSegment->raInfo.linkedList_perVirtualRegister; } -raLivenessSubrange_t* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) +raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) { - auto it = imlSegment->raInfo.linkedList_perVirtualGPR2.find(regId); - if (it == imlSegment->raInfo.linkedList_perVirtualGPR2.end()) + auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId); + if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end()) return nullptr; return it->second; } -raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) +raLivenessRange* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) { uint32 regId = regToSearch.GetRegID(); - raLivenessSubrange_t* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); + raLivenessRange* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); while (subrangeItr) { if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) return subrangeItr; - subrangeItr = subrangeItr->link_sameVirtualRegisterGPR.next; + subrangeItr = subrangeItr->link_sameVirtualRegister.next; } return nullptr; } -void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessSubrange_t* subrange, sint32 instructionIndex) +void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessRange* subrange, sint32 instructionIndex) { DEBUG_BREAK; } @@ -381,42 +381,42 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon _sortSegmentAllSubrangesLinkedList(imlSegment); IMLRALivenessTimeline livenessTimeline; - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { sint32 currentIndex = subrangeItr->start.index; PPCRecRA_debugValidateSubrange(subrangeItr); livenessTimeline.ExpireRanges(std::min(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges // if subrange already has register assigned then add it to the active list and continue - if (subrangeItr->range->physicalRegister >= 0) + if (subrangeItr->GetPhysicalRegister() >= 0) { // verify if register is actually available #ifdef CEMU_DEBUG_ASSERT for (auto& liverangeItr : livenessTimeline.activeRanges) { // check for register mismatch - cemu_assert_debug(liverangeItr->range->physicalRegister != subrangeItr->range->physicalRegister); + cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister()); } #endif livenessTimeline.AddActiveRange(subrangeItr); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; continue; } // find free register for current subrangeItr and segment - IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->range->virtualRegister); + IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister()); IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool for (auto& liverangeItr : livenessTimeline.activeRanges) { - cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); - physRegSet.SetReserved(liverangeItr->range->physicalRegister); + cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0); + physRegSet.SetReserved(liverangeItr->GetPhysicalRegister()); } // check intersections with other ranges and determine allowed registers IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) if(physRegSet.HasAnyAvailable()) { // check globally in all segments - PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr->range, physRegSet); + PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, physRegSet); } if (!physRegSet.HasAnyAvailable()) { @@ -427,7 +427,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon struct { sint32 distance; - raLivenessSubrange_t* largestHoleSubrange; + raLivenessRange* largestHoleSubrange; sint32 cost; // additional cost of choosing this candidate }localRangeHoleCutting; // split current range (this is generally only a good choice when the current range is long but rarely used) @@ -440,7 +440,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon // explode a inter-segment range (prefer ranges that are not read/written in this segment) struct { - raLivenessRange_t* range; + raLivenessRange* range; sint32 cost; sint32 distance; // size of hole // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange @@ -540,7 +540,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon if( distance < 2) continue; sint32 cost; - cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); + cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); // if the hole is not large enough, add cost of splitting current subrange if (distance < requiredSize) { @@ -553,7 +553,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon { spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.distance = distance; - spillStrategies.explodeRange.range = candidate->range; + spillStrategies.explodeRange.range = candidate; } } // choose strategy @@ -581,7 +581,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon else if (subrangeItr->start.index == RA_INTER_RANGE_START) { // alternative strategy if we have no other choice: explode current range - PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr->range); + PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr); } else assert_dbg(); @@ -603,27 +603,27 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon if (candidate->end.index != RA_INTER_RANGE_END) continue; // only select candidates that clash with current subrange - if (candidate->range->physicalRegister < 0 && candidate != subrangeItr) + if (candidate->GetPhysicalRegister() < 0 && candidate != subrangeItr) continue; sint32 cost; - cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); + cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate); // compare with current best candidate for this strategy if (cost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = candidate->range; + spillStrategies.explodeRange.range = candidate; } } // add current range as a candidate too sint32 ownCost; - ownCost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(subrangeItr->range); + ownCost = PPCRecRARange_estimateCostAfterRangeExplode(subrangeItr); if (ownCost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = ownCost; spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = subrangeItr->range; + spillStrategies.explodeRange.range = subrangeItr; } if (spillStrategies.explodeRange.cost == INT_MAX) assert_dbg(); // should not happen @@ -632,10 +632,11 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon return false; } // assign register to range - subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); + //subrangeItr->SetPhysicalRegister(physRegSet.GetFirstAvailableReg()); + subrangeItr->SetPhysicalRegisterForCluster(physRegSet.GetFirstAvailableReg()); livenessTimeline.AddActiveRange(subrangeItr); // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } return true; } @@ -673,137 +674,30 @@ void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* } } -struct subrangeEndingInfo_t -{ - //boost::container::small_vector subrangeList2; - raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; - sint32 subrangeCount; - - bool hasUndefinedEndings; -}; - -void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) -{ - if (depth >= 30) - { - info->hasUndefinedEndings = true; - return; - } - if (subrange->lastIterationIndex == iterationIndex) - return; // already processed - subrange->lastIterationIndex = iterationIndex; - if (subrange->hasStoreDelayed) - return; // no need to traverse this subrange - IMLSegment* imlSegment = subrange->imlSegment; - if (subrange->end.index != RA_INTER_RANGE_END) - { - // ending segment - if (info->subrangeCount >= SUBRANGE_LIST_SIZE) - { - info->hasUndefinedEndings = true; - return; - } - else - { - info->subrangeList[info->subrangeCount] = subrange; - info->subrangeCount++; - } - return; - } - - // traverse next subranges in flow - if (imlSegment->nextSegmentBranchNotTaken) - { - if (subrange->subrangeBranchNotTaken == nullptr) - { - info->hasUndefinedEndings = true; - } - else - { - _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); - } - } - if (imlSegment->nextSegmentBranchTaken) - { - if (subrange->subrangeBranchTaken == nullptr) - { - info->hasUndefinedEndings = true; - } - else - { - _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); - } - } -} - -void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) -{ - if (subrange->end.index != RA_INTER_RANGE_END) - return; - // analyze data flow across segments (if this segment has writes) - if (subrange->hasStore) - { - subrangeEndingInfo_t writeEndingInfo; - writeEndingInfo.subrangeCount = 0; - writeEndingInfo.hasUndefinedEndings = false; - _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); - if (writeEndingInfo.hasUndefinedEndings == false) - { - // get cost of delaying store into endings - sint32 delayStoreCost = 0; - bool alreadyStoredInAllEndings = true; - for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) - { - raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; - if( subrangeItr->hasStore ) - continue; // this ending already stores, no extra cost - alreadyStoredInAllEndings = false; - sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); - delayStoreCost = std::max(storeCost, delayStoreCost); - } - if (alreadyStoredInAllEndings) - { - subrange->hasStore = false; - subrange->hasStoreDelayed = true; - } - else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) - { - subrange->hasStore = false; - subrange->hasStoreDelayed = true; - for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) - { - raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; - subrangeItr->hasStore = true; - } - } - } - } -} - inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) { return IMLReg(baseFormat, baseFormat, 0, regId); } -void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span loadList) +void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span loadList) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadList.size()); for (sint32 i = 0; i < loadList.size(); i++) { - IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->range->virtualRegister]; + IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->GetVirtualRegister()]; cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); - imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->range->physicalRegister), loadList[i]->range->name); + imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->GetPhysicalRegister()), loadList[i]->GetName()); } } -void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span storeList) +void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span storeList) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeList.size()); for (size_t i = 0; i < storeList.size(); i++) { - IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->range->virtualRegister]; + IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->GetVirtualRegister()]; cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); - imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->range->name, _MakeNativeReg(baseFormat, storeList[i]->range->physicalRegister)); + imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->GetName(), _MakeNativeReg(baseFormat, storeList[i]->GetPhysicalRegister())); } } @@ -814,7 +708,7 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_START) @@ -827,12 +721,12 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML assert_dbg(); } // update translation table - cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->range->virtualRegister)); + cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->GetVirtualRegister())); #endif - virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); } // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } // process instructions while(index < imlSegment->imlList.size() + 1) @@ -842,7 +736,7 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) { // update translation table - virtId2PhysRegIdMap.erase(expiredRange->range->virtualRegister); + virtId2PhysRegIdMap.erase(expiredRange->GetVirtualRegister()); // store GPR if required // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (expiredRange->hasStore) @@ -874,9 +768,9 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML subrangeItr->start.index--; } // update translation table - virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.insert_or_assign(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); } - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } // rewrite registers if (index < imlSegment->imlList.size()) @@ -885,12 +779,12 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML index++; } // expire infinite subranges (subranges which cross the segment border) - std::vector loadStoreList; + std::vector loadStoreList; livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); for (auto liverange : livenessTimeline.GetExpiredRanges()) { // update translation table - virtId2PhysRegIdMap.erase(liverange->range->virtualRegister); + virtId2PhysRegIdMap.erase(liverange->GetVirtualRegister()); // store GPR if (liverange->hasStore) loadStoreList.emplace_back(liverange); @@ -910,10 +804,10 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML if (subrangeItr->_noLoad == false) loadStoreList.emplace_back(subrangeItr); // update translation table - virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); + virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister()); } // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; + subrangeItr = subrangeItr->link_allSegmentRanges.next; } if (!loadStoreList.empty()) PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); @@ -1026,7 +920,7 @@ void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) } } -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) +raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name) { IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR); if (!abstractRange) @@ -1034,7 +928,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext if (abstractRange->isProcessed) { // return already existing segment - raLivenessSubrange_t* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); + raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); cemu_assert_debug(existingRange); return existingRange; } @@ -1043,7 +937,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext #ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); #endif - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, range, imlSegment, abstractRange->usageStart, abstractRange->usageEnd); + raLivenessRange* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, imlSegment, vGPR, name, abstractRange->usageStart, abstractRange->usageEnd); // traverse forward if (abstractRange->usageEnd == RA_INTER_RANGE_END) { @@ -1052,7 +946,8 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR); if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START) { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, range); + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name); + subrange->subrangeBranchTaken->previousRanges.push_back(subrange); cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); } } @@ -1061,7 +956,8 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR); if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START) { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, range); + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name); + subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange); cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); } } @@ -1075,7 +971,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext if(!prevRange) continue; if (prevRange->usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ctx, it, vGPR, range); + PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name); } } // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction @@ -1100,13 +996,12 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML if(it.second.isProcessed) continue; IMLRegID regId = it.first; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.raParam->regIdToName.find(regId)->second); - PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); + PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second); } // fill created ranges with read/write location indices // note that at this point there is only one range per register per segment // and the algorithm below relies on this - const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); + const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); size_t index = 0; IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) @@ -1114,7 +1009,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { IMLRegID gprId = gprReg.GetRegID(); - raLivenessSubrange_t* subrange = regToSubrange.find(gprId)->second; + raLivenessRange* subrange = regToSubrange.find(gprId)->second; PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT if ((sint32)index < subrange->start.index) @@ -1351,7 +1246,7 @@ void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ct IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); } -void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) +void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessRange* subrange) { bool isRead = false; bool isWritten = false; @@ -1376,23 +1271,135 @@ void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) subrange->_noLoad = true; } -void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) + +struct subrangeEndingInfo_t { - // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore - // first do a per-subrange pass - for (auto& range : ppcImlGenContext->raInfo.list_ranges) + //boost::container::small_vector subrangeList2; + raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE]; + sint32 subrangeCount; + + bool hasUndefinedEndings; +}; + +void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) +{ + if (depth >= 30) { - for (auto& subrange : range->list_subranges) + info->hasUndefinedEndings = true; + return; + } + if (subrange->lastIterationIndex == iterationIndex) + return; // already processed + subrange->lastIterationIndex = iterationIndex; + if (subrange->hasStoreDelayed) + return; // no need to traverse this subrange + IMLSegment* imlSegment = subrange->imlSegment; + if (subrange->end.index != RA_INTER_RANGE_END) + { + // ending segment + if (info->subrangeCount >= SUBRANGE_LIST_SIZE) { - PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); + info->hasUndefinedEndings = true; + return; + } + else + { + info->subrangeList[info->subrangeCount] = subrange; + info->subrangeCount++; + } + return; + } + + // traverse next subranges in flow + if (imlSegment->nextSegmentBranchNotTaken) + { + if (subrange->subrangeBranchNotTaken == nullptr) + { + info->hasUndefinedEndings = true; + } + else + { + _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); } } - // then do a second pass where we scan along subrange flow - for (auto& range : ppcImlGenContext->raInfo.list_ranges) + if (imlSegment->nextSegmentBranchTaken) { - for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm + if (subrange->subrangeBranchTaken == nullptr) + { + info->hasUndefinedEndings = true; + } + else + { + _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); + } + } +} + +static void _analyzeRangeDataFlow(raLivenessRange* subrange) +{ + if (subrange->end.index != RA_INTER_RANGE_END) + return; + // analyze data flow across segments (if this segment has writes) + if (subrange->hasStore) + { + subrangeEndingInfo_t writeEndingInfo; + writeEndingInfo.subrangeCount = 0; + writeEndingInfo.hasUndefinedEndings = false; + _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); + if (writeEndingInfo.hasUndefinedEndings == false) + { + // get cost of delaying store into endings + sint32 delayStoreCost = 0; + bool alreadyStoredInAllEndings = true; + for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) + { + raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; + if( subrangeItr->hasStore ) + continue; // this ending already stores, no extra cost + alreadyStoredInAllEndings = false; + sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); + delayStoreCost = std::max(storeCost, delayStoreCost); + } + if (alreadyStoredInAllEndings) + { + subrange->hasStore = false; + subrange->hasStoreDelayed = true; + } + else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) + { + subrange->hasStore = false; + subrange->hasStoreDelayed = true; + for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) + { + raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; + subrangeItr->hasStore = true; + } + } + } + } +} + +void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) +{ + // this function is called after _assignRegisters(), which means that all liveness ranges are already final and must not be changed anymore + // in the first pass we track read/write dependencies + for(auto& seg : ppcImlGenContext->segmentList2) + { + raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; + while(subrange) + { + PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); + subrange = subrange->link_allSegmentRanges.next; + } + } + // then we do a second pass where we scan along subrange flow + for(auto& seg : ppcImlGenContext->segmentList2) + { + raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; + while(subrange) { _analyzeRangeDataFlow(subrange); + subrange = subrange->link_allSegmentRanges.next; } } } @@ -1407,8 +1414,6 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment - ppcImlGenContext->raInfo.list_ranges = std::vector(); - ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); IMLRA_CalculateLivenessRanges(ctx); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index f722e7ca..602cdfa7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -3,45 +3,110 @@ #include "IMLRegisterAllocatorRanges.h" #include "util/helpers/MemoryPool.h" -void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessSubrange_t* subrange) +uint32 PPCRecRA_getNextIterationIndex(); + +IMLRegID raLivenessRange::GetVirtualRegister() const { - IMLRegID regId = subrange->range->virtualRegister; + return virtualRegister; +} + +sint32 raLivenessRange::GetPhysicalRegister() const +{ + return physicalRegister; +} + +IMLName raLivenessRange::GetName() const +{ + return name; +} + +void raLivenessRange::SetPhysicalRegister(sint32 physicalRegister) +{ + cemu_assert_suspicious(); // not used yet + this->physicalRegister = physicalRegister; +} + +void raLivenessRange::SetPhysicalRegisterForCluster(sint32 physicalRegister) +{ + auto clusterRanges = GetAllSubrangesInCluster(); + for(auto& range : clusterRanges) + range->physicalRegister = physicalRegister; +} + +boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster() +{ + uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); + boost::container::small_vector subranges; + subranges.push_back(this); + this->lastIterationIndex = iterationIndex; + size_t i = 0; + while(isubrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex) + { + cur->subrangeBranchTaken->lastIterationIndex = iterationIndex; + subranges.push_back(cur->subrangeBranchTaken); + } + if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex) + { + cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex; + subranges.push_back(cur->subrangeBranchNotTaken); + } + // check predecessors + for(auto& prev : cur->previousRanges) + { + if(prev->lastIterationIndex != iterationIndex) + { + prev->lastIterationIndex = iterationIndex; + subranges.push_back(prev); + } + } + } + return subranges; +} + +void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) +{ + IMLRegID regId = subrange->GetVirtualRegister(); auto it = root.find(regId); if (it == root.end()) { // new single element root.try_emplace(regId, subrange); - subrange->link_sameVirtualRegisterGPR.prev = nullptr; - subrange->link_sameVirtualRegisterGPR.next = nullptr; + subrange->link_sameVirtualRegister.prev = nullptr; + subrange->link_sameVirtualRegister.next = nullptr; } else { // insert in first position - subrange->link_sameVirtualRegisterGPR.next = it->second; + subrange->link_sameVirtualRegister.next = it->second; it->second = subrange; - subrange->link_sameVirtualRegisterGPR.prev = subrange; + subrange->link_sameVirtualRegister.prev = subrange; } } -void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) +void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange) { - subrange->link_segmentSubrangesGPR.next = *root; + subrange->link_allSegmentRanges.next = *root; if (*root) - (*root)->link_segmentSubrangesGPR.prev = subrange; - subrange->link_segmentSubrangesGPR.prev = nullptr; + (*root)->link_allSegmentRanges.prev = subrange; + subrange->link_allSegmentRanges.prev = nullptr; *root = subrange; } -void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessSubrange_t* subrange) +void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) { - IMLRegID regId = subrange->range->virtualRegister; - raLivenessSubrange_t* nextRange = subrange->link_sameVirtualRegisterGPR.next; - raLivenessSubrange_t* prevRange = subrange->link_sameVirtualRegisterGPR.prev; - raLivenessSubrange_t* newBase = prevRange ? prevRange : nextRange; + IMLRegID regId = subrange->GetVirtualRegister(); + raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next; + raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev; + raLivenessRange* newBase = prevRange ? prevRange : nextRange; if (prevRange) - prevRange->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next; + prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next; if (nextRange) - nextRange->link_sameVirtualRegisterGPR.prev = subrange->link_sameVirtualRegisterGPR.prev; + nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev; if (!prevRange) { @@ -55,81 +120,78 @@ void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_maplink_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1; - subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1; + subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1; + subrange->link_sameVirtualRegister.next = (raLivenessRange*)1; #endif } -void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) +void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange) { - raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev; - if (subrange->link_segmentSubrangesGPR.prev) - subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next; + raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev; + if (subrange->link_allSegmentRanges.prev) + subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next; else - (*root) = subrange->link_segmentSubrangesGPR.next; - if (subrange->link_segmentSubrangesGPR.next) - subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev; + (*root) = subrange->link_allSegmentRanges.next; + if (subrange->link_allSegmentRanges.next) + subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev; #ifdef CEMU_DEBUG_ASSERT - subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1; - subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1; + subrange->link_allSegmentRanges.prev = (raLivenessRange*)1; + subrange->link_allSegmentRanges.next = (raLivenessRange*)1; #endif } -MemoryPoolPermanentObjects memPool_livenessRange(4096); -MemoryPoolPermanentObjects memPool_livenessSubrange(4096); +MemoryPoolPermanentObjects memPool_livenessSubrange(4096); -raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name) +raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex) { - raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj(); - livenessRange->list_subranges.resize(0); - livenessRange->virtualRegister = virtualRegister; - livenessRange->name = name; - livenessRange->physicalRegister = -1; - ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange); - return livenessRange; -} - -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex) -{ - raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj(); - livenessSubrange->list_locations.resize(0); - livenessSubrange->range = range; - livenessSubrange->imlSegment = imlSegment; - PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex); - PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex); + raLivenessRange* range = memPool_livenessSubrange.acquireObj(); + range->previousRanges.clear(); + range->list_locations.resize(0); + range->imlSegment = imlSegment; + PPCRecompilerIml_setSegmentPoint(&range->start, imlSegment, startIndex); + PPCRecompilerIml_setSegmentPoint(&range->end, imlSegment, endIndex); + // register mapping + range->virtualRegister = virtualRegister; + range->name = name; + range->physicalRegister = -1; // default values - livenessSubrange->hasStore = false; - livenessSubrange->hasStoreDelayed = false; - livenessSubrange->lastIterationIndex = 0; - livenessSubrange->subrangeBranchNotTaken = nullptr; - livenessSubrange->subrangeBranchTaken = nullptr; - livenessSubrange->_noLoad = false; - // add to range - range->list_subranges.push_back(livenessSubrange); - // add to segment - PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, livenessSubrange); - PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange); - return livenessSubrange; + range->hasStore = false; + range->hasStoreDelayed = false; + range->lastIterationIndex = 0; + range->subrangeBranchNotTaken = nullptr; + range->subrangeBranchTaken = nullptr; + range->_noLoad = false; + // add to segment linked lists + PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range); + PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range); + return range; } -void _unlinkSubrange(raLivenessSubrange_t* subrange) +void _unlinkSubrange(raLivenessRange* subrange) { IMLSegment* imlSegment = subrange->imlSegment; - PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, subrange); - PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange); + PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, subrange); + PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, subrange); } -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) +void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { _unlinkSubrange(subrange); - subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); + //subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); subrange->list_locations.clear(); + // unlink reverse references + if(subrange->subrangeBranchTaken) + subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange)); + if(subrange->subrangeBranchNotTaken) + subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange)); + PPCRecompilerIml_removeSegmentPoint(&subrange->start); PPCRecompilerIml_removeSegmentPoint(&subrange->end); memPool_livenessSubrange.releaseObj(subrange); } -void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) +// leaves range and linked ranges in invalid state. Only use at final clean up when no range is going to be accessed anymore +void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { _unlinkSubrange(subrange); PPCRecompilerIml_removeSegmentPoint(&subrange->start); @@ -137,49 +199,30 @@ void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenCont memPool_livenessSubrange.releaseObj(subrange); } -void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) +void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) { - for (auto& subrange : range->list_subranges) + auto clusterRanges = subrange->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) { - _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); + _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, subrange); } - ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range)); - memPool_livenessRange.releaseObj(range); -} - -void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) -{ - for (auto& subrange : range->list_subranges) - { - _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); - } - memPool_livenessRange.releaseObj(range); } void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) { - for(auto& range : ppcImlGenContext->raInfo.list_ranges) + for(auto& seg : ppcImlGenContext->segmentList2) { - PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range); + raLivenessRange* cur; + while(cur = seg->raInfo.linkedList_allSubranges) + { + _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, cur); + } + seg->raInfo.linkedList_allSubranges = nullptr; + seg->raInfo.linkedList_perVirtualRegister.clear(); } - ppcImlGenContext->raInfo.list_ranges.clear(); } -void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange) -{ - cemu_assert_debug(range != absorbedRange); - cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister); - // move all subranges from absorbedRange to range - for (auto& subrange : absorbedRange->list_subranges) - { - range->list_subranges.push_back(subrange); - subrange->range = range; - } - absorbedRange->list_subranges.clear(); - PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange); -} - -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange) +void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange) { #ifdef CEMU_DEBUG_ASSERT PPCRecRA_debugValidateSubrange(subrange); @@ -193,6 +236,12 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSub if (subrange == absorbedSubrange) assert_dbg(); #endif + + // update references + if(absorbedSubrange->subrangeBranchTaken) + *std::find(absorbedSubrange->subrangeBranchTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange; + if(absorbedSubrange->subrangeBranchNotTaken) + *std::find(absorbedSubrange->subrangeBranchNotTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; @@ -210,29 +259,27 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSub PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange); } -// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges) -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) +// remove all inter-segment connections from the range cluster and split it into local ranges (also removes empty ranges) +void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange) { - if (range->list_subranges.size() == 1) - assert_dbg(); - for (auto& subrange : range->list_subranges) + auto clusterRanges = originRange->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) { if (subrange->list_locations.empty()) continue; - raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name); - raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1); + raLivenessRange* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1); // copy locations for (auto& location : subrange->list_locations) { newSubrange->list_locations.push_back(location); } } - // remove original range - PPCRecRA_deleteRange(ppcImlGenContext, range); + // remove subranges + PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange); } #ifdef CEMU_DEBUG_ASSERT -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) +void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange) { // validate subrange if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken) @@ -252,7 +299,7 @@ void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {} // The return value is the tail subrange // If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations // Ranges that begin at RA_INTER_RANGE_START are allowed and can be split -raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole) +raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole) { // validation #ifdef CEMU_DEBUG_ASSERT @@ -266,8 +313,7 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC assert_dbg(); #endif // create tail - raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name); - raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index); + raLivenessRange* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), splitIndex, subrange->end.index); // copy locations for (auto& location : subrange->list_locations) { @@ -312,7 +358,7 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC return tailSubrange; } -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite) +void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite) { if (subrange->list_locations.empty()) { @@ -339,13 +385,12 @@ sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment) return v*v; // 25, 100, 225, 400 } -// calculate cost of entire range -// ignores data flow and does not detect avoidable reads/stores -sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) +// calculate cost of entire range cluster +sint32 PPCRecRARange_estimateTotalCost(std::span ranges) { sint32 cost = 0; - // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). + // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). // currently we calculate the cost based on the most expensive entry/exit point @@ -354,7 +399,7 @@ sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) sint32 readCount = 0; sint32 writeCount = 0; - for (auto& subrange : range->list_subranges) + for (auto& subrange : ranges) { if (subrange->start.index != RA_INTER_RANGE_START) { @@ -375,10 +420,11 @@ sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) } // calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it -sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range) +sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange) { - sint32 cost = -PPCRecRARange_estimateCost(range); - for (auto& subrange : range->list_subranges) + auto ranges = subrange->GetAllSubrangesInCluster(); + sint32 cost = -PPCRecRARange_estimateTotalCost(ranges); + for (auto& subrange : ranges) { if (subrange->list_locations.empty()) continue; @@ -387,7 +433,7 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* return cost; } -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex) +sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex) { // validation #ifdef CEMU_DEBUG_ASSERT diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h index 28fbe906..31deaab3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -1,26 +1,77 @@ #pragma once -raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name); -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex); -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange); -void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); +struct raLivenessLocation_t +{ + sint32 index; + bool isRead; + bool isWrite; + + raLivenessLocation_t() = default; + + raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) + : index(index), isRead(isRead), isWrite(isWrite) {}; +}; + +struct raLivenessSubrangeLink +{ + struct raLivenessRange* prev; + struct raLivenessRange* next; +}; + +struct raLivenessRange +{ + IMLSegment* imlSegment; + IMLSegmentPoint start; + IMLSegmentPoint end; + // dirty state tracking + bool _noLoad; + bool hasStore; + bool hasStoreDelayed; + // next + raLivenessRange* subrangeBranchTaken; + raLivenessRange* subrangeBranchNotTaken; + // reverse counterpart of BranchTaken/BranchNotTaken + boost::container::small_vector previousRanges; + // processing + uint32 lastIterationIndex; + // instruction locations + std::vector list_locations; + // linked list (subranges with same GPR virtual register) + raLivenessSubrangeLink link_sameVirtualRegister; + // linked list (all subranges for this segment) + raLivenessSubrangeLink link_allSegmentRanges; + // register mapping (constant) + IMLRegID virtualRegister; + IMLName name; + // register allocator result + sint32 physicalRegister; + + boost::container::small_vector GetAllSubrangesInCluster(); + + IMLRegID GetVirtualRegister() const; + sint32 GetPhysicalRegister() const; + IMLName GetName() const; + void SetPhysicalRegister(sint32 physicalRegister); + void SetPhysicalRegisterForCluster(sint32 physicalRegister); +}; + +raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex); +void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange); -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); +void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange); -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange); +void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange); -raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false); +raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole = false); -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite); -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange); +void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite); +void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); // cost estimation sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment); -sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range); -sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range); -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex); +sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange); +sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex); // special values to mark the index of ranges that reach across the segment border #define RA_INTER_RANGE_START (-1) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index f0420b01..0589d660 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,6 +1,8 @@ #pragma once #include "IMLInstruction.h" +#include + struct IMLSegmentPoint { sint32 index; @@ -9,63 +11,14 @@ struct IMLSegmentPoint IMLSegmentPoint* prev; }; -struct raLivenessLocation_t -{ - sint32 index; - bool isRead; - bool isWrite; - - raLivenessLocation_t() = default; - - raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) - : index(index), isRead(isRead), isWrite(isWrite) {}; -}; - -struct raLivenessSubrangeLink_t -{ - struct raLivenessSubrange_t* prev; - struct raLivenessSubrange_t* next; -}; - -struct raLivenessSubrange_t -{ - struct raLivenessRange_t* range; - IMLSegment* imlSegment; - IMLSegmentPoint start; - IMLSegmentPoint end; - // dirty state tracking - bool _noLoad; - bool hasStore; - bool hasStoreDelayed; - // next - raLivenessSubrange_t* subrangeBranchTaken; - raLivenessSubrange_t* subrangeBranchNotTaken; - // processing - uint32 lastIterationIndex; - // instruction locations - std::vector list_locations; - // linked list (subranges with same GPR virtual register) - raLivenessSubrangeLink_t link_sameVirtualRegisterGPR; - // linked list (all subranges for this segment) - raLivenessSubrangeLink_t link_segmentSubrangesGPR; -}; - -struct raLivenessRange_t -{ - IMLRegID virtualRegister; - sint32 physicalRegister; - IMLName name; - std::vector list_subranges; -}; - struct PPCSegmentRegisterAllocatorInfo_t { // used during loop detection bool isPartOfProcessedLoop{}; sint32 lastIterationIndex{}; // linked lists - raLivenessSubrange_t* linkedList_allSubranges{}; - std::unordered_map linkedList_perVirtualGPR2; + struct raLivenessRange* linkedList_allSubranges{}; + std::unordered_map linkedList_perVirtualRegister; }; struct IMLSegment diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 25a2c163..846426f5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -16,6 +16,7 @@ #include "IML/IML.h" #include "IML/IMLRegisterAllocator.h" #include "BackendX64/BackendX64.h" +#include "util/highresolutiontimer/HighResolutionTimer.h" struct PPCInvalidationRange { @@ -157,6 +158,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; + BenchmarkTimer bt; + bt.Start(); + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; ppcImlGenContext.debug_entryPPCAddress = range.startAddress; @@ -240,9 +244,18 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } + bt.Stop(); + //cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); - cemuLog_logDebug(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code); + uint32 codeHash = 0; + for (uint32 i = 0; i < ppcRecFunc->x86Size; i++) + { + codeHash = _rotr(codeHash, 3); + codeHash += ((uint8*)ppcRecFunc->x86Code)[i]; + } + + //cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash); return ppcRecFunc; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 94b3fcd9..706855d4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -54,11 +54,6 @@ struct ppcImlGenContext_t std::vector segmentList2; // code generation control bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode - // register allocator info - struct - { - std::vector list_ranges; - }raInfo; // analysis info struct {