#include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "PPCRecompilerX64.h" #include "PPCRecompilerImlRanges.h" void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml); uint32 recRACurrentIterationIndex = 0; uint32 PPCRecRA_getNextIterationIndex() { recRACurrentIterationIndex++; return recRACurrentIterationIndex; } bool _detectLoop(PPCRecImlSegment_t* currentSegment, sint32 depth, uint32 iterationIndex, PPCRecImlSegment_t* imlSegmentLoopBase) { if (currentSegment == imlSegmentLoopBase) return true; if (currentSegment->raInfo.lastIterationIndex == iterationIndex) return currentSegment->raInfo.isPartOfProcessedLoop; if (depth >= 9) return false; currentSegment->raInfo.lastIterationIndex = iterationIndex; currentSegment->raInfo.isPartOfProcessedLoop = false; if (currentSegment->nextSegmentIsUncertain) return false; if (currentSegment->nextSegmentBranchNotTaken) { if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex) { currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); } } if (currentSegment->nextSegmentBranchTaken) { if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex) { currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); } } if (currentSegment->raInfo.isPartOfProcessedLoop) currentSegment->loopDepth++; return currentSegment->raInfo.isPartOfProcessedLoop; } void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegmentLoopBase) { uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex; if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase)) { imlSegmentLoopBase->loopDepth++; } } void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) { if (imlSegment->nextSegmentIsUncertain) return; // check if this segment has a branch that links to itself (tight loop) if (imlSegment->nextSegmentBranchTaken == imlSegment) { // segment loops over itself imlSegment->loopDepth++; return; } // check if this segment has a branch that goes backwards (potential complex loop) if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex) { PPCRecRA_detectLoop(ppcImlGenContext, imlSegment); } } typedef struct { sint32 name; sint32 virtualRegister; sint32 physicalRegister; bool isDirty; }raRegisterState_t; const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) { for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) { if (regState[i].virtualRegister == virtualRegister) { #ifndef PUBLIC_RELEASE if (regState[i].physicalRegister < 0) assert_dbg(); #endif return regState + i; } } return nullptr; } raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) { for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) { if (regState[i].physicalRegister < 0) { regState[i].physicalRegister = i; return regState + i; } } return nullptr; } typedef struct { uint16 registerIndex; uint16 registerName; }raLoadStoreInfo_t; void PPCRecRA_insertGPRLoadInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + 0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = registerIndex; imlInstructionItr->op_r_name.name = registerName; imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; } void PPCRecRA_insertGPRLoadInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* loadList, sint32 loadCount) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadCount); memset(imlSegment->imlList + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*loadCount); for (sint32 i = 0; i < loadCount; i++) { PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + i); imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = (uint8)loadList[i].registerIndex; imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName; imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; } } void PPCRecRA_insertGPRStoreInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + 0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = registerIndex; imlInstructionItr->op_r_name.name = registerName; imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; } void PPCRecRA_insertGPRStoreInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* storeList, sint32 storeCount) { PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeCount); memset(imlSegment->imlList + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*storeCount); for (sint32 i = 0; i < storeCount; i++) { PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + i); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; imlInstructionItr->op_r_name.registerIndex = (uint8)storeList[i].registerIndex; imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName; imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; } } #define SUBRANGE_LIST_SIZE (128) sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, sint32 startIndex) { for (sint32 i = 0; i < subrange->list_locations.size(); i++) { if (subrange->list_locations.data()[i].index >= startIndex) return subrange->list_locations.data()[i].index - startIndex; } return INT_MAX; } // count how many instructions there are until physRegister is used by any subrange (returns 0 if register is in use at startIndex, and INT_MAX if not used for the remainder of the segment) sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 physRegister) { sint32 minDistance = INT_MAX; // next raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrangeItr->range->physicalRegister != physRegister) { subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index) return 0; if (subrangeItr->start.index >= startIndex) { minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex)); } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } return minDistance; } typedef struct { raLivenessSubrange_t* liveRangeList[64]; sint32 liveRangesCount; }raLiveRangeInfo_t; // return a bitmask that contains only registers that are not used by any colliding range uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) { uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; for (auto& subrange : range->list_subranges) { PPCRecImlSegment_t* imlSegment = subrange->imlSegment; raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrange == subrangeItr) { // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) { if(subrangeItr->range->physicalRegister >= 0) physRegisterMask &= ~(1<<(subrangeItr->range->physicalRegister)); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } } return physRegisterMask; } bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } void _sortSegmentAllSubrangesLinkedList(PPCRecImlSegment_t* imlSegment) { raLivenessSubrange_t* subrangeList[4096+1]; sint32 count = 0; // disassemble linked list raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while (subrangeItr) { if (count >= 4096) assert_dbg(); subrangeList[count] = subrangeItr; count++; // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } if (count == 0) { imlSegment->raInfo.linkedList_allSubranges = nullptr; return; } // sort std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare); //for (sint32 i1 = 0; i1 < count; i1++) //{ // for (sint32 i2 = i1+1; i2 < count; i2++) // { // if (subrangeList[i1]->start.index > subrangeList[i2]->start.index) // { // // swap // raLivenessSubrange_t* temp = subrangeList[i1]; // subrangeList[i1] = subrangeList[i2]; // subrangeList[i2] = temp; // } // } //} // reassemble linked list subrangeList[count] = nullptr; imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; subrangeList[0]->link_segmentSubrangesGPR.prev = nullptr; subrangeList[0]->link_segmentSubrangesGPR.next = subrangeList[1]; for (sint32 i = 1; i < count; i++) { subrangeList[i]->link_segmentSubrangesGPR.prev = subrangeList[i - 1]; subrangeList[i]->link_segmentSubrangesGPR.next = subrangeList[i + 1]; } // validate list #ifndef PUBLIC_RELEASE sint32 count2 = 0; subrangeItr = imlSegment->raInfo.linkedList_allSubranges; sint32 currentStartIndex = RA_INTER_RANGE_START; while (subrangeItr) { count2++; if (subrangeItr->start.index < currentStartIndex) assert_dbg(); currentStartIndex = subrangeItr->start.index; // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } if (count != count2) assert_dbg(); #endif } bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) { // sort subranges ascending by start index //std::sort(imlSegment->raInfo.list_subranges.begin(), imlSegment->raInfo.list_subranges.end(), _sortSubrangesByStartIndexDepr); _sortSegmentAllSubrangesLinkedList(imlSegment); raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; //sint32 subrangeIndex = 0; //for (auto& subrange : imlSegment->raInfo.list_subranges) raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { sint32 currentIndex = subrangeItr->start.index; // validate subrange PPCRecRA_debugValidateSubrange(subrangeItr); // expire ranges for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; if (liverange->end.index <= currentIndex && liverange->end.index != RA_INTER_RANGE_END) { #ifndef PUBLIC_RELEASE if (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken) assert_dbg(); // infinite subranges should not expire #endif // remove entry liveInfo.liveRangesCount--; liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; f--; } } // check if subrange already has register assigned if (subrangeItr->range->physicalRegister >= 0) { // verify if register is actually available #ifndef PUBLIC_RELEASE for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverangeItr = liveInfo.liveRangeList[f]; if (liverangeItr->range->physicalRegister == subrangeItr->range->physicalRegister) { // this should never happen because we try to preventively avoid register conflicts assert_dbg(); } } #endif // add to live ranges liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } // find free register uint32 physRegisterMask = (1<range->physicalRegister < 0) assert_dbg(); physRegisterMask &= ~(1<range->physicalRegister); } // check intersections with other ranges and determine allowed registers uint32 allowedPhysRegisterMask = 0; uint32 unusedRegisterMask = physRegisterMask; // mask of registers that are currently not used (does not include range checks) if (physRegisterMask != 0) { allowedPhysRegisterMask = PPCRecRA_getAllowedRegisterMaskForFullRange(subrangeItr->range); physRegisterMask &= allowedPhysRegisterMask; } if (physRegisterMask == 0) { struct { // estimated costs and chosen candidates for the different spill strategies // hole cutting into a local range struct { sint32 distance; raLivenessSubrange_t* largestHoleSubrange; sint32 cost; // additional cost of choosing this candidate }localRangeHoleCutting; // split current range (this is generally only a good choice when the current range is long but rarely used) struct { sint32 cost; sint32 physRegister; sint32 distance; // size of hole }availableRegisterHole; // explode a inter-segment range (prefer ranges that are not read/written in this segment) struct { raLivenessRange_t* range; sint32 cost; sint32 distance; // size of hole // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange }explodeRange; // todo - add more strategies, make cost estimation smarter (for example, in some cases splitting can have reduced or no cost if read/store can be avoided due to data flow) }spillStrategies; // cant assign register // there might be registers available, we just can't use them due to range conflicts if (subrangeItr->end.index != RA_INTER_RANGE_END) { // range ends in current segment // Current algo looks like this: // 1) Get the size of the largest possible hole that we can cut into any of the live local subranges // 1.1) Check if the hole is large enough to hold the current subrange // 2) If yes, cut hole and return false (full retry) // 3) If no, try to reuse free register (need to determine how large the region is we can use) // 4) If there is no free register or the range is extremely short go back to step 1+2 but additionally split the current subrange at where the hole ends cemu_assert_debug(currentIndex == subrangeItr->start.index); sint32 requiredSize = subrangeItr->end.index - subrangeItr->start.index; // evaluate strategy: Cut hole into local subrange spillStrategies.localRangeHoleCutting.distance = -1; spillStrategies.localRangeHoleCutting.largestHoleSubrange = nullptr; spillStrategies.localRangeHoleCutting.cost = INT_MAX; if (currentIndex >= 0) { for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index == RA_INTER_RANGE_END) continue; sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); if (distance < 2) continue; // not even worth the consideration // calculate split cost of candidate sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentIndex + distance); // calculate additional split cost of currentRange if hole is not large enough if (distance < requiredSize) { cost += PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes) cost += (requiredSize - distance) / 10; } // compare cost with previous candidates if (cost < spillStrategies.localRangeHoleCutting.cost) { spillStrategies.localRangeHoleCutting.cost = cost; spillStrategies.localRangeHoleCutting.distance = distance; spillStrategies.localRangeHoleCutting.largestHoleSubrange = candidate; } } } // evaluate strategy: Split current range to fit in available holes spillStrategies.availableRegisterHole.cost = INT_MAX; spillStrategies.availableRegisterHole.distance = -1; spillStrategies.availableRegisterHole.physRegister = -1; if (currentIndex >= 0) { if (unusedRegisterMask != 0) { for (sint32 t = 0; t < PPC_X64_GPR_USABLE_REGISTERS; t++) { if ((unusedRegisterMask&(1 << t)) == 0) continue; // get size of potential hole for this register sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, t); if (distance < 2) continue; // not worth consideration // calculate additional cost due to split if (distance >= requiredSize) assert_dbg(); // should not happen or else we would have selected this register sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); // add small additional cost for the remaining range (prefer larger holes) cost += (requiredSize - distance) / 10; if (cost < spillStrategies.availableRegisterHole.cost) { spillStrategies.availableRegisterHole.cost = cost; spillStrategies.availableRegisterHole.distance = distance; spillStrategies.availableRegisterHole.physRegister = t; } } } } // evaluate strategy: Explode inter-segment ranges spillStrategies.explodeRange.cost = INT_MAX; spillStrategies.explodeRange.range = nullptr; spillStrategies.explodeRange.distance = -1; for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index != RA_INTER_RANGE_END) continue; sint32 distance = PPCRecRA_countInstructionsUntilNextUse(liveInfo.liveRangeList[f], currentIndex); if( distance < 2) continue; sint32 cost; cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); // if the hole is not large enough, add cost of splitting current subrange if (distance < requiredSize) { cost += PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); // add small additional cost for the remaining range (prefer larger holes) cost += (requiredSize - distance) / 10; } // compare with current best candidate for this strategy if (cost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.distance = distance; spillStrategies.explodeRange.range = candidate->range; } } // choose strategy if (spillStrategies.explodeRange.cost != INT_MAX && spillStrategies.explodeRange.cost <= spillStrategies.localRangeHoleCutting.cost && spillStrategies.explodeRange.cost <= spillStrategies.availableRegisterHole.cost) { // explode range PPCRecRA_explodeRange(ppcImlGenContext, spillStrategies.explodeRange.range); // split current subrange if necessary if( requiredSize > spillStrategies.explodeRange.distance) PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex+spillStrategies.explodeRange.distance, true); } else if (spillStrategies.availableRegisterHole.cost != INT_MAX && spillStrategies.availableRegisterHole.cost <= spillStrategies.explodeRange.cost && spillStrategies.availableRegisterHole.cost <= spillStrategies.localRangeHoleCutting.cost) { // use available register PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex + spillStrategies.availableRegisterHole.distance, true); } else if (spillStrategies.localRangeHoleCutting.cost != INT_MAX && spillStrategies.localRangeHoleCutting.cost <= spillStrategies.explodeRange.cost && spillStrategies.localRangeHoleCutting.cost <= spillStrategies.availableRegisterHole.cost) { // cut hole PPCRecRA_splitLocalSubrange(ppcImlGenContext, spillStrategies.localRangeHoleCutting.largestHoleSubrange, currentIndex + spillStrategies.localRangeHoleCutting.distance, true); // split current subrange if necessary if (requiredSize > spillStrategies.localRangeHoleCutting.distance) PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex + spillStrategies.localRangeHoleCutting.distance, true); } else if (subrangeItr->start.index == RA_INTER_RANGE_START) { // alternative strategy if we have no other choice: explode current range PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr->range); } else assert_dbg(); return false; } else { // range exceeds segment border // simple but bad solution -> explode the entire range (no longer allow it to cross segment boundaries) // better solutions: 1) Depending on the situation, we can explode other ranges to resolve the conflict. Thus we should explode the range with the lowest extra cost // 2) Or we explode the range only partially // explode the range with the least cost spillStrategies.explodeRange.cost = INT_MAX; spillStrategies.explodeRange.range = nullptr; spillStrategies.explodeRange.distance = -1; for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; if (candidate->end.index != RA_INTER_RANGE_END) continue; // only select candidates that clash with current subrange if (candidate->range->physicalRegister < 0 && candidate != subrangeItr) continue; sint32 cost; cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); // compare with current best candidate for this strategy if (cost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.distance = INT_MAX; spillStrategies.explodeRange.range = candidate->range; } } // add current range as a candidate too sint32 ownCost; ownCost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(subrangeItr->range); if (ownCost < spillStrategies.explodeRange.cost) { spillStrategies.explodeRange.cost = ownCost; spillStrategies.explodeRange.distance = INT_MAX; spillStrategies.explodeRange.range = subrangeItr->range; } if (spillStrategies.explodeRange.cost == INT_MAX) assert_dbg(); // should not happen PPCRecRA_explodeRange(ppcImlGenContext, spillStrategies.explodeRange.range); } return false; } // assign register to range sint32 registerIndex = -1; for (sint32 f = 0; f < PPC_X64_GPR_USABLE_REGISTERS; f++) { if ((physRegisterMask&(1 << f)) != 0) { registerIndex = f; break; } } subrangeItr->range->physicalRegister = registerIndex; // add to live ranges liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } return true; } void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) { maxLoopDepth = std::max(maxLoopDepth, ppcImlGenContext->segmentList[i]->loopDepth); } while (true) { bool done = false; for (sint32 d = maxLoopDepth; d >= 0; d--) { for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) { PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; if (imlSegment->loopDepth != d) continue; done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, imlSegment); if (done == false) break; } if (done == false) break; } if (done) break; } } typedef struct { raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; sint32 subrangeCount; bool hasUndefinedEndings; }subrangeEndingInfo_t; void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) { if (depth >= 30) { info->hasUndefinedEndings = true; return; } if (subrange->lastIterationIndex == iterationIndex) return; // already processed subrange->lastIterationIndex = iterationIndex; if (subrange->hasStoreDelayed) return; // no need to traverse this subrange PPCRecImlSegment_t* imlSegment = subrange->imlSegment; if (subrange->end.index != RA_INTER_RANGE_END) { // ending segment if (info->subrangeCount >= SUBRANGE_LIST_SIZE) { info->hasUndefinedEndings = true; return; } else { info->subrangeList[info->subrangeCount] = subrange; info->subrangeCount++; } return; } // traverse next subranges in flow if (imlSegment->nextSegmentBranchNotTaken) { if (subrange->subrangeBranchNotTaken == nullptr) { info->hasUndefinedEndings = true; } else { _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); } } if (imlSegment->nextSegmentBranchTaken) { if (subrange->subrangeBranchTaken == nullptr) { info->hasUndefinedEndings = true; } else { _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); } } } void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) { if (subrange->end.index != RA_INTER_RANGE_END) return; // analyze data flow across segments (if this segment has writes) if (subrange->hasStore) { subrangeEndingInfo_t writeEndingInfo; writeEndingInfo.subrangeCount = 0; writeEndingInfo.hasUndefinedEndings = false; _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); if (writeEndingInfo.hasUndefinedEndings == false) { // get cost of delaying store into endings sint32 delayStoreCost = 0; bool alreadyStoredInAllEndings = true; for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) { raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; if( subrangeItr->hasStore ) continue; // this ending already stores, no extra cost alreadyStoredInAllEndings = false; sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); delayStoreCost = std::max(storeCost, delayStoreCost); } if (alreadyStoredInAllEndings) { subrange->hasStore = false; subrange->hasStoreDelayed = true; } else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) { subrange->hasStore = false; subrange->hasStoreDelayed = true; for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) { raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; subrangeItr->hasStore = true; } } } } } void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) { sint16 virtualReg2PhysReg[PPC_REC_MAX_VIRTUAL_GPR]; for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) virtualReg2PhysReg[i] = -1; raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; sint32 index = 0; sint32 suffixInstructionCount = (imlSegment->imlListCount > 0 && PPCRecompiler_isSuffixInstruction(imlSegment->imlList + imlSegment->imlListCount - 1)) ? 1 : 0; // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; //for (auto& subrange : imlSegment->raInfo.list_subranges) while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_START) { liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; #ifndef PUBLIC_RELEASE // load GPR if (subrangeItr->_noLoad == false) { assert_dbg(); } // update translation table if (virtualReg2PhysReg[subrangeItr->range->virtualRegister] != -1) assert_dbg(); #endif virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // process instructions while(index < imlSegment->imlListCount+1) { // expire ranges for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; if (liverange->end.index <= index) { // update translation table if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) assert_dbg(); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; // store GPR if (liverange->hasStore) { PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlListCount - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); index++; } // remove entry liveInfo.liveRangesCount--; liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; f--; } } // load new ranges subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { if (subrangeItr->start.index == index) { liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; // load GPR if (subrangeItr->_noLoad == false) { PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlListCount - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); index++; subrangeItr->start.index--; } // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // replace registers if (index < imlSegment->imlListCount) { PPCImlOptimizerUsedRegisters_t gprTracking; PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); sint32 inputGpr[4]; inputGpr[0] = gprTracking.gpr[0]; inputGpr[1] = gprTracking.gpr[1]; inputGpr[2] = gprTracking.gpr[2]; inputGpr[3] = gprTracking.gpr[3]; sint32 replaceGpr[4]; for (sint32 f = 0; f < 4; f++) { sint32 virtualRegister = gprTracking.gpr[f]; if (virtualRegister < 0) { replaceGpr[f] = -1; continue; } if (virtualRegister >= PPC_REC_MAX_VIRTUAL_GPR) assert_dbg(); replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; cemu_assert_debug(replaceGpr[f] >= 0); } PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList + index, inputGpr, replaceGpr); } // next iml instruction index++; } // expire infinite subranges (subranges that cross the segment border) sint32 storeLoadListLength = 0; raLoadStoreInfo_t loadStoreList[PPC_REC_MAX_VIRTUAL_GPR]; for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; if (liverange->end.index == RA_INTER_RANGE_END) { // update translation table cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; // store GPR if (liverange->hasStore) { loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; loadStoreList[storeLoadListLength].registerName = liverange->range->name; storeLoadListLength++; } // remove entry liveInfo.liveRangesCount--; liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; f--; } else { cemu_assert_suspicious(); } } if (storeLoadListLength > 0) { PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlListCount - suffixInstructionCount, loadStoreList, storeLoadListLength); } // load subranges for next segments subrangeItr = imlSegment->raInfo.linkedList_allSubranges; storeLoadListLength = 0; while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_END) { liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; // load GPR if (subrangeItr->_noLoad == false) { loadStoreList[storeLoadListLength].registerIndex = subrangeItr->range->physicalRegister; loadStoreList[storeLoadListLength].registerName = subrangeItr->range->name; storeLoadListLength++; } // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } if (storeLoadListLength > 0) { PPCRecRA_insertGPRLoadInstructions(imlSegment, imlSegment->imlListCount - suffixInstructionCount, loadStoreList, storeLoadListLength); } } void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) { for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) { PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); } } void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code sint32 segmentIndex = 0; while (segmentIndex < ppcImlGenContext->segmentListCount) { PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; if (imlSegment->nextSegmentIsUncertain) { segmentIndex++; continue; } if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr) { segmentIndex++; continue; } if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1) { segmentIndex++; continue; } if (imlSegment->nextSegmentBranchNotTaken->isEnterable) { segmentIndex++; continue; } PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1); PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList[segmentIndex + 0]; PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList[segmentIndex + 1]; PPCRecImlSegment_t* nextSegment = imlSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); segmentIndex++; } // detect loops for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) { PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; imlSegment->momentaryIndex = s; } for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) { PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment); } } void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) { PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); // calculate liveness PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_assignRegisters(ppcImlGenContext); PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); PPCRecRA_generateMoveInstructions(ppcImlGenContext); PPCRecRA_deleteAllRanges(ppcImlGenContext); }