From e1e710e3f5cbf7808f5c564307d1949733bc0d2c Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 12 Dec 2022 08:50:29 +0100 Subject: [PATCH] PPCRec: Reworked IML builder to work with basic-blocks Intermediate commit while I'm still fixing things but I didn't want to pile on too many changes in a single commit. New: Reworked PPC->IML converter to first create a graph of basic blocks and then turn those into IML segment(s). This was mainly done to decouple IML design from having PPC specific knowledge like branch target addresses. The previous design also didn't allow to preserve cycle counting properly in all cases since it was based on IML instruction counting. The new solution supports functions with non-continuous body. A pretty common example for this is when functions end with a trailing B instruction to some other place. Current limitations: - BL inlining not implemented - MFTB not implemented - BCCTR and BCLR are only partially implemented Undo vcpkg change --- src/Cafe/HW/Espresso/EspressoISA.h | 20 +- .../Recompiler/BackendX64/BackendX64.cpp | 135 +- .../Recompiler/BackendX64/BackendX64.h | 19 +- .../Recompiler/BackendX64/BackendX64Gen.cpp | 5 - .../Recompiler/BackendX64/X64Emit.hpp | 1 - src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 2 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 62 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 8 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 107 +- .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 29 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 28 +- .../Recompiler/PPCFunctionBoundaryTracker.h | 45 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 21 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 28 + .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 7 +- .../Recompiler/PPCRecompilerImlGen.cpp | 1618 ++++++++++++----- .../Recompiler/PPCRecompilerIntermediate.cpp | 106 +- src/Cemu/Logging/CemuLogging.h | 2 +- 18 files changed, 1554 insertions(+), 689 deletions(-) diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h index b3ae45c3..e66e1424 100644 --- a/src/Cafe/HW/Espresso/EspressoISA.h +++ b/src/Cafe/HW/Espresso/EspressoISA.h @@ -91,13 +91,15 @@ namespace Espresso BCCTR = 528 }; - enum class OPCODE_31 + enum class Opcode31 { - + TW = 4, + MFTB = 371, }; inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); }; inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); }; + inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); }; struct BOField { @@ -132,6 +134,12 @@ namespace Espresso uint8 bo; }; + // returns true if LK bit is set, only valid for branch instructions + inline bool DecodeLK(uint32 opcode) + { + return (opcode & 1) != 0; + } + inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK) { LI = opcode & 0x3fffffc; @@ -183,13 +191,7 @@ namespace Espresso _decodeForm_D_branch(opcode, BD, BO, BI, AA, LK); } - inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) - { - // form XL (with BD field expected to be zero) - _decodeForm_XL(opcode, BO, BI, LK); - } - - inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) + inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR { // form XL (with BD field expected to be zero) _decodeForm_XL(opcode, BO, BI, LK); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index b69d70a0..5080421e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -24,15 +24,7 @@ sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping */ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) { - if( x64GenContext->relocateOffsetTableCount >= x64GenContext->relocateOffsetTableSize ) - { - x64GenContext->relocateOffsetTableSize = std::max(4, x64GenContext->relocateOffsetTableSize*2); - x64GenContext->relocateOffsetTable = (x64RelocEntry_t*)realloc(x64GenContext->relocateOffsetTable, sizeof(x64RelocEntry_t)*x64GenContext->relocateOffsetTableSize); - } - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].offset = x64GenContext->codeBufferIndex; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].type = type; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].extraInfo = extraInfo; - x64GenContext->relocateOffsetTableCount++; + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo); } /* @@ -306,6 +298,9 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) { + // according to MS ABI the caller needs to save: + // RAX, RCX, RDX, R8, R9, R10, R11 + uint32 ppcAddress = imlInstruction->op_macro.param; uint32 sprId = imlInstruction->op_macro.param2&0xFFFF; uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F; @@ -321,7 +316,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, // reserve space on stack for call parameters x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11 + 8); x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); - // call HLE function + // call function if( sprId == SPR_TBL ) x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBL); else if( sprId == SPR_TBU ) @@ -1971,6 +1966,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { + if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) + { + debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n"); + return false; + } + if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) { // jump always @@ -1985,19 +1986,25 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec else { // deprecated (jump to jumpmark) + __debugbreak(); // deprecated PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); x64Gen_jmp_imm32(x64GenContext, 0); } } else { - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - assert_dbg(); + if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) + { + debug_printf("Unsupported deprecated cjump to ppc address\n"); + return false; + } + cemu_assert_debug(imlSegment->nextSegmentBranchTaken); + // generate jump update marker if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); sint32 condition = imlInstruction->op_conditionalJump.condition; if( condition == PPCREC_JUMP_CONDITION_E ) x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); @@ -2015,19 +2022,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); return true; } @@ -2036,19 +2043,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -2057,26 +2064,28 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } } + cemu_assert_debug(false); // should not reach? } x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + cemu_assert_debug(imlSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken()); if( imlInstruction->op_conditionalJump.bitMustBeSet ) { x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -2094,13 +2103,14 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // some tests (all performed on a i7-4790K) - // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write) + // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency) // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC // BT x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); + cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken()); + x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); return true; } @@ -2152,22 +2162,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex; - // generate code - if( ppcImlGenContext->hasFPUInstruction ) - { - // old FPU unavailable code - //PPCRecompilerX86_crConditionFlags_saveBeforeOverwrite(PPCRecFunction, ppcImlGenContext, x64GenContext); - //// skip if FP bit in MSR is set - //// #define MSR_FP (1<<13) - //x64Gen_bt_mem8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, msr), 13); - //uint32 jmpCodeOffset = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_CARRY, 0); - //x64Gen_mov_reg32_imm32(x64GenContext, REG_EAX, imlInstruction->op_ppcEnter.ppcAddress&0x7FFFFFFF); - //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X86_RELOC_MAKE_RELATIVE); - //x64Gen_jmp_imm32(x64GenContext, (uint32)PPCRecompiler_recompilerCallEscapeAndCallFPUUnavailable); - //// patch jump - //*(uint32*)(x64GenContext->codeBuffer+jmpCodeOffset+2) = x64GenContext->codeBufferIndex-jmpCodeOffset-6; - } } void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) @@ -2193,7 +2187,6 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, } else assert_dbg(); - //x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr)+sizeof(uint32)*(name-PPCREC_NAME_SPR0)); } else assert_dbg(); @@ -2256,7 +2249,7 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 1024; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; @@ -2266,6 +2259,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo bool codeGenerationFailed = false; for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { + x64GenContext.currentSegment = segIt; segIt->x64Offset = x64GenContext.codeBufferIndex; for(size_t i=0; iimlList.size(); i++) { @@ -2442,48 +2436,43 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if( codeGenerationFailed ) { free(x64GenContext.codeBuffer); - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); return false; } // allocate executable memory uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); size_t baseAddress = (size_t)executableMemory; // fix relocs - for(sint32 i=0; isegmentList2) - { - if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) - { - x64Offset = segIt->x64Offset; - break; - } - } - if (x64Offset == 0xFFFFFFFF) - { - debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); - // todo: Cleanup - return false; - } + cemu_assert_suspicious(); + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) + // { + // x64Offset = segIt->x64Offset; + // break; + // } + //} + //if (x64Offset == 0xFFFFFFFF) + //{ + // debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); + // // todo: Cleanup + // return false; + //} } else { - IMLSegment* destSegment = (IMLSegment*)x64GenContext.relocateOffsetTable[i].extraInfo; + IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; x64Offset = destSegment->x64Offset; } - uint32 relocBase = x64GenContext.relocateOffsetTable[i].offset; + uint32 relocBase = relocIt.offset; uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) { @@ -2525,8 +2514,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); free(x64GenContext.codeBuffer); x64GenContext.codeBuffer = nullptr; - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); // set code PPCRecFunction->x86Code = executableMemory; PPCRecFunction->x86Size = x64GenContext.codeBufferIndex; @@ -2535,7 +2522,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo void PPCRecompilerX64Gen_generateEnterRecompilerCode() { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 1024; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; @@ -2615,7 +2602,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 128; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 60cc1e2a..0548f402 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -3,6 +3,8 @@ struct x64RelocEntry_t { + x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {}; + uint32 offset; uint8 type; void* extraInfo; @@ -10,16 +12,16 @@ struct x64RelocEntry_t struct x64GenContext_t { - uint8* codeBuffer; - sint32 codeBufferIndex; - sint32 codeBufferSize; + IMLSegment* currentSegment{}; + + uint8* codeBuffer{}; + sint32 codeBufferIndex{}; + sint32 codeBufferSize{}; // cr state - sint32 activeCRRegister; // current x86 condition flags reflect this cr* register - sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned) + sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register + sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) // relocate offsets - x64RelocEntry_t* relocateOffsetTable; - sint32 relocateOffsetTableSize; - sint32 relocateOffsetTableCount; + std::vector relocateOffsetTable2; }; // Some of these are defined by winnt.h and gnu headers @@ -126,7 +128,6 @@ enum #define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) #define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) -#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction #define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset #define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 3abecb75..33ff52ac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -5,11 +5,6 @@ // mulx, rorx, sarx, shlx, shrx // PDEP, PEXT -void x64Gen_checkBuffer(x64GenContext_t* x64GenContext) -{ - // todo -} - void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v) { if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp index e936f1d8..b4021931 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp @@ -203,7 +203,6 @@ template void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB) { static_assert(TA::getType() == MODRM_OPR_TYPE::REG); - x64Gen_checkBuffer(x64GenContext); // REX prefix // 0100 WRXB if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 06f39815..72a2d3f5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -28,5 +28,5 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext); // debug -void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 69d8e1b7..560f5de1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -104,31 +104,48 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml } } -void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) +std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg) +{ + if (!ctx) + { + return ""; + } + // find segment index + for (size_t i = 0; i < ctx->segmentList2.size(); i++) + { + if (ctx->segmentList2[i] == seg) + { + return fmt::format("Seg{:04x}", i); + } + } + return ""; +} + +void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); - strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth); + strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); if (imlSegment->isEnterable) { strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); } - else if (imlSegment->isJumpDestination) - { - strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - } + //else if (imlSegment->isJumpDestination) + //{ + // strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); + //} debug_printf("%s\n", strOutput.c_str()); - strOutput.reset(); - strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - debug_printf("%s", strOutput.c_str()); + //strOutput.reset(); + //strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); + //debug_printf("%s", strOutput.c_str()); if (printLivenessRangeInfo) { IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); } - debug_printf("\n"); + //debug_printf("\n"); sint32 lineOffsetParameters = 18; @@ -376,22 +393,22 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress); + strOutput.addFmt("CYCLE_CHECK\n"); } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { @@ -451,14 +468,15 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin { if (i) debug_printf(", "); - debug_printf("%p", (void*)imlSegment->list_prevSegments[i]); + debug_printf("%s", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str()); } debug_printf("\n"); - debug_printf("Links to: "); if (imlSegment->nextSegmentBranchNotTaken) - debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken); + debug_printf("BranchNotTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str()); if (imlSegment->nextSegmentBranchTaken) - debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken); + debug_printf("BranchTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str()); + if (imlSegment->nextSegmentIsUncertain) + debug_printf("Dynamic target\n"); debug_printf("\n"); } @@ -466,7 +484,7 @@ void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext) { for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - IMLDebug_DumpSegment(ppcImlGenContext->segmentList2[i], i); + IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false); debug_printf("\n"); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 18cf580d..c86bb610 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -152,7 +152,7 @@ enum PPCREC_IML_TYPE_R_S32, // r* (op) imm PPCREC_IML_TYPE_MACRO, PPCREC_IML_TYPE_CJUMP, // conditional jump - PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0 + PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) // conditional @@ -420,6 +420,11 @@ struct IMLInstruction op_jumpmark.address = address; } + void make_debugbreak(uint32 currentPPCAddress = 0) + { + make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0); + } + void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) { type = PPCREC_IML_TYPE_MACRO; @@ -431,6 +436,7 @@ struct IMLInstruction void make_ppcEnter(uint32 ppcAddress) { + cemu_assert_suspicious(); // removed type = PPCREC_IML_TYPE_PPC_ENTER; operation = 0; op_ppcEnter.ppcAddress = ppcAddress; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 08d776e7..b90aa9b1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -74,44 +74,44 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml } } -typedef struct -{ - sint32 name; - sint32 virtualRegister; - sint32 physicalRegister; - bool isDirty; -}raRegisterState_t; +//typedef struct +//{ +// sint32 name; +// sint32 virtualRegister; +// sint32 physicalRegister; +// bool isDirty; +//}raRegisterState_t; -const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; - -raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].virtualRegister == virtualRegister) - { -#ifdef CEMU_DEBUG_ASSERT - if (regState[i].physicalRegister < 0) - assert_dbg(); -#endif - return regState + i; - } - } - return nullptr; -} - -raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].physicalRegister < 0) - { - regState[i].physicalRegister = i; - return regState + i; - } - } - return nullptr; -} +//const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; +// +//raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) +//{ +// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) +// { +// if (regState[i].virtualRegister == virtualRegister) +// { +//#ifdef CEMU_DEBUG_ASSERT +// if (regState[i].physicalRegister < 0) +// assert_dbg(); +//#endif +// return regState + i; +// } +// } +// return nullptr; +//} +// +//raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) +//{ +// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) +// { +// if (regState[i].physicalRegister < 0) +// { +// regState[i].physicalRegister = i; +// return regState + i; +// } +// } +// return nullptr; +//} typedef struct { @@ -309,18 +309,32 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #endif } +void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +{ + // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) + // algorithm goes as follows: + // 1) Iterate all instructions from beginning to end and keep a list of covering ranges + // 2) If we encounter an instruction with a fixed-register we: + // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction + // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves + // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction + // Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range + // Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range + // Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care + // assume imlSegment->raInfo.linkedList_allSubranges is ordered ascending by start index already + + // todo +} + bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - // sort subranges ascending by start index - - //std::sort(imlSegment->raInfo.list_subranges.begin(), imlSegment->raInfo.list_subranges.end(), _sortSubrangesByStartIndexDepr); _sortSegmentAllSubrangesLinkedList(imlSegment); + PPCRecRA_HandleFixedRegisters(ppcImlGenContext, imlSegment); + raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; - //sint32 subrangeIndex = 0; - //for (auto& subrange : imlSegment->raInfo.list_subranges) raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { @@ -365,7 +379,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - // find free register + // find free register for this segment uint32 physRegisterMask = (1<range); physRegisterMask &= allowedPhysRegisterMask; } @@ -761,7 +776,6 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - //for (auto& subrange : imlSegment->raInfo.list_subranges) while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_START) @@ -933,7 +947,7 @@ void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code @@ -985,7 +999,7 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) { - PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext); + PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); @@ -1243,7 +1257,6 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe if (remainingScanDist <= 0) return; // can't reach end - // also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index 4882a0a1..2b2c56a2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -1,6 +1,13 @@ #include "IMLInstruction.h" #include "IMLSegment.h" +void IMLSegment::SetEnterable(uint32 enterAddress) +{ + cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress); + isEnterable = true; + enterPPCAddress = enterAddress; +} + bool IMLSegment::HasSuffixInstruction() const { if (imlList.empty()) @@ -16,8 +23,30 @@ IMLInstruction* IMLSegment::GetLastInstruction() return &imlList.back(); } +void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchNotTaken) + nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this)); + nextSegmentBranchNotTaken = imlSegmentDst; + if(imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} +void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchTaken) + nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this)); + nextSegmentBranchTaken = imlSegmentDst; + if (imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} +IMLInstruction* IMLSegment::AppendInstruction() +{ + IMLInstruction& inst = imlList.emplace_back(); + memset(&inst, 0, sizeof(IMLInstruction)); + return &inst; +} void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 1e27d303..7ea7903b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -103,8 +103,8 @@ struct IMLSegment bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true // jump destination segments - bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps - uint32 jumpDestinationPPCAddress{}; + //bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps + //uint32 jumpDestinationPPCAddress{}; // PPC FPR use mask bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR // CR use mask @@ -115,10 +115,30 @@ struct IMLSegment PPCSegmentRegisterAllocatorInfo_t raInfo{}; PPCRecVGPRDistances_t raDistances{}; bool raRangeExtendProcessed{}; - // segment points - IMLSegmentPoint* segmentPointList{}; + + // segment state API + void SetEnterable(uint32 enterAddress); + void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst); + void SetLinkBranchTaken(IMLSegment* imlSegmentDst); + + IMLSegment* GetBranchTaken() + { + return nextSegmentBranchTaken; + } + + IMLSegment* GetBranchNotTaken() + { + return nextSegmentBranchNotTaken; + } + + // instruction API + IMLInstruction* AppendInstruction(); + bool HasSuffixInstruction() const; IMLInstruction* GetLastInstruction(); + + // segment points + IMLSegmentPoint* segmentPointList{}; }; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h index e558292b..96b5143e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h @@ -21,6 +21,16 @@ public: }; public: + ~PPCFunctionBoundaryTracker() + { + while (!map_ranges.empty()) + { + PPCRange_t* range = *map_ranges.begin(); + delete range; + map_ranges.erase(map_ranges.begin()); + } + } + void trackStartPoint(MPTR startAddress) { processRange(startAddress, nullptr, nullptr); @@ -40,10 +50,34 @@ public: return false; } + std::vector GetRanges() + { + std::vector r; + for (auto& it : map_ranges) + r.emplace_back(*it); + return r; + } + + bool ContainsAddress(uint32 addr) const + { + for (auto& it : map_ranges) + { + if (addr >= it->startAddress && addr < it->getEndAddress()) + return true; + } + return false; + } + + const std::set& GetBranchTargets() const + { + return map_branchTargetsAll; + } + private: void addBranchDestination(PPCRange_t* sourceRange, MPTR address) { - map_branchTargets.emplace(address); + map_queuedBranchTargets.emplace(address); + map_branchTargetsAll.emplace(address); } // process flow of instruction @@ -114,7 +148,7 @@ private: Espresso::BOField BO; uint32 BI; bool LK; - Espresso::decodeOp_BCLR(opcode, BO, BI, LK); + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); if (BO.branchAlways() && !LK) { // unconditional BLR @@ -218,7 +252,7 @@ private: auto rangeItr = map_ranges.begin(); PPCRange_t* previousRange = nullptr; - for (std::set::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); ) + for (std::set::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); ) { while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr)) { @@ -239,7 +273,7 @@ private: (*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length)) { // delete visited targets - targetItr = map_branchTargets.erase(targetItr); + targetItr = map_queuedBranchTargets.erase(targetItr); continue; } @@ -289,5 +323,6 @@ private: }; std::set map_ranges; - std::set map_branchTargets; + std::set map_queuedBranchTargets; + std::set map_branchTargetsAll; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 9ff113b1..8ec2f545 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -131,7 +131,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress) } bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext); -PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut) +PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker) { if (range.startAddress >= PPC_REC_CODE_AREA_END) { @@ -156,10 +156,10 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; - + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; - bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses); + bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker); if (compiledSuccessfully == false) { delete ppcRecFunc; @@ -173,6 +173,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + //if (ppcRecFunc->ppcAddress == 0x12345678) + //{ + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + + // Large functions for testing (botw): + // 3B4049C + // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); if (x64GenerationSuccess == false) @@ -181,6 +191,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } // collect list of PPC-->x64 entry points + cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); + cemu_assert_debug(ppcImlGenContext.imlListCount == 0); + entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { @@ -359,7 +372,7 @@ void PPCRecompiler_recompileAtAddress(uint32 address) PPCRecompilerState.recompilerSpinlock.unlock(); std::vector> functionEntryPoints; - auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints); + auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries); if (!func) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index e943d8d3..10cd0aa0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -31,9 +31,12 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp struct ppcImlGenContext_t { + class PPCFunctionBoundaryTracker* boundaryTracker; PPCRecFunction_t* functionRef; uint32* currentInstruction; uint32 ppcAddressOfCurrentInstruction; + IMLSegment* currentOutputSegment; + struct PPCBasicBlockInfo* currentBasicBlock{}; // fpr mode bool LSQE{ true }; bool PSE{ true }; @@ -82,6 +85,31 @@ struct ppcImlGenContext_t { return *PPCRecompilerImlGen_generateNewEmptyInstruction(this); } + + IMLSegment* NewSegment() + { + IMLSegment* seg = new IMLSegment(); + segmentList2.emplace_back(seg); + return seg; + } + + size_t GetSegmentIndex(IMLSegment* seg) + { + for (size_t i = 0; i < segmentList2.size(); i++) + { + if (segmentList2[i] == seg) + return i; + } + cemu_assert_error(); + return 0; + } + + IMLSegment* InsertSegment(size_t index) + { + IMLSegment* newSeg = new IMLSegment(); + segmentList2.insert(segmentList2.begin() + index, 1, newSeg); + return newSeg; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 3b8783f5..0521c440 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -1,7 +1,12 @@ #define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker); + +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); + +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext); IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index a1cb6f2e..6d488b17 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1,28 +1,74 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" +#include "Cafe/HW/Espresso/EspressoISA.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "IML/IML.h" #include "IML/IMLRegisterAllocatorRanges.h" +#include "PPCFunctionBoundaryTracker.h" + +struct PPCBasicBlockInfo +{ + PPCBasicBlockInfo(uint32 startAddress, const std::set& entryAddresses) : startAddress(startAddress), lastAddress(startAddress) + { + isEnterable = entryAddresses.find(startAddress) != entryAddresses.end(); + } + + uint32 startAddress; + uint32 lastAddress; // inclusive + bool isEnterable{ false }; + //uint32 enterableAddress{}; -> covered by startAddress + bool hasContinuedFlow{ true }; // non-branch path goes to next segment (lastAddress+4), assumed by default + bool hasBranchTarget{ false }; + uint32 branchTarget{}; + + // associated IML segments + IMLSegment* firstSegment{}; // first segment in chain, used as branch target for other segments + IMLSegment* appendSegment{}; // last segment in chain, new instructions should be appended to this segment + + void SetInitialSegment(IMLSegment* seg) + { + cemu_assert_debug(!firstSegment); + cemu_assert_debug(!appendSegment); + firstSegment = seg; + appendSegment = seg; + } + + IMLSegment* GetFirstSegmentInChain() + { + return firstSegment; + } + + IMLSegment* GetSegmentForInstructionAppend() + { + return appendSegment; + } +}; bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset); IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { - if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) - { - sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); - ppcImlGenContext->imlListSize = newSize; - } - IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - memset(imlInstruction, 0x00, sizeof(IMLInstruction)); - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default - imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - ppcImlGenContext->imlListCount++; - return imlInstruction; + //if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) + //{ + // sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; + // ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); + // ppcImlGenContext->imlListSize = newSize; + //} + //IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; + //memset(imlInstruction, 0x00, sizeof(IMLInstruction)); + //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default + //imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; + //ppcImlGenContext->imlListCount++; + //return imlInstruction; + + IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); + memset(&inst, 0x00, sizeof(IMLInstruction)); + inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default +//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; + + return &inst; } void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) @@ -109,6 +155,8 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress) { + __debugbreak(); + // jump if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -168,10 +216,13 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { + __debugbreak(); + // conditional jump IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; imlInstruction->op_conditionalJump.condition = jumpCondition; imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; @@ -179,6 +230,19 @@ void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; } +void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) +{ + // conditional jump + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; + imlInstruction->op_conditionalJump.condition = jumpCondition; + imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; + imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; + imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; +} + void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { // load from memory @@ -363,7 +427,13 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // split before and after to make sure the macro is in an isolated segment that we can make enterable + PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction); PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + middleSeg->SetLinkBranchTaken(nullptr); + middleSeg->SetLinkBranchNotTaken(nullptr); } bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -417,6 +487,9 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PPCRecompilerImlGen_MFTB(): Not supported\n"); + return false; + uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); @@ -426,6 +499,8 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // TBL / TBU uint32 param2 = spr | (rD << 16); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + return true; } return false; @@ -560,7 +635,7 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin ppcImlGenContext->cyclesSinceLastBranch++; if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext)) { - assert_dbg(); + cemu_assert_suspicious(); } } // add range @@ -582,33 +657,17 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { // function call - // check if function can be inlined - sint32 inlineFuncInstructionCount = 0; - if (PPCRecompiler_canInlineFunction(jumpAddressDest, &inlineFuncInstructionCount)) - { - // generate NOP iml instead of BL macro (this assures that segment PPC range remains intact) - PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext, NULL); - //cemuLog_log(LogType::Force, "Inline func 0x{:08x} at {:08x}", jumpAddressDest, ppcImlGenContext->ppcAddressOfCurrentInstruction); - uint32* prevInstructionPtr = ppcImlGenContext->currentInstruction; - ppcImlGenContext->currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(jumpAddressDest); - PPCRecompiler_generateInlinedCode(ppcImlGenContext, jumpAddressDest, inlineFuncInstructionCount); - ppcImlGenContext->currentInstruction = prevInstructionPtr; - return true; - } - // generate funtion call instructions ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); return true; } // is jump destination within recompiled function? - if( jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize) ) + if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) { - // generate instruction - PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext, NULL, jumpAddressDest); + // jump to target within same function + PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); } else { - // todo: Inline this jump destination if possible (in many cases it's a bunch of GPR/FPR store instructions + BLR) ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); } return true; @@ -616,6 +675,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); @@ -661,11 +722,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) else if( crBit == 3 ) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } - // generate instruction - //ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } return false; @@ -678,9 +738,9 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC); if( decrementerMustBeZero ) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); else - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); return true; } else @@ -688,8 +748,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( ignoreCondition ) { // branch always, no condition and no decrementer - debugBreakpoint(); - crRegister = PPC_REC_INVALID_REGISTER; // not necessary but lets optimizer know we dont care for cr register on this instruction + // not supported + return false; } else { @@ -717,17 +777,20 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } - if (jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize)) + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, jumpCondition, crRegister, crBit, conditionMustBeTrue); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); } else { // far jump + debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet"); + return false; + PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); + //ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } } } @@ -736,6 +799,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); @@ -750,7 +815,7 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool ignoreCondition = (BO&16)!=0; bool saveLR = (opcode&PPC_OPC_LK)!=0; // since we skip this instruction if the condition is true, we need to invert the logic - bool invertedConditionMustBeTrue = !conditionMustBeTrue; + //bool invertedConditionMustBeTrue = !conditionMustBeTrue; if( useDecrementer ) { cemu_assert_debug(false); @@ -760,28 +825,37 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { if( ignoreCondition ) { - // store LR + // branch always, no condition and no decrementer check + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); if( saveLR ) { ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); } else { - // branch always, no condition and no decrementer ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } else { + cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + + //debug_printf("[Rec-Disable] BCLR with condition or LR\n"); + //return false; + // store LR if( saveLR ) { + cemu_assert_unimplemented(); // todo - this is difficult to handle because it needs to jump to the unmodified LR (we should cache it in a register which we pass to the macro?) + return false; + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); } // generate jump condition - if( invertedConditionMustBeTrue ) + if(conditionMustBeTrue) { if( crBit == 0 ) jumpCondition = PPCREC_JUMP_CONDITION_L; @@ -803,9 +877,17 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod else if( crBit == 3 ) jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; } - // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + + //if(conditionMustBeTrue) + // ppcImlGenContext->emitInst().make_debugbreak(ppcImlGenContext->ppcAddressOfCurrentInstruction); + + // write the BCTR instruction to a new segment that is set as a branch target for the current segment + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + + bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } return true; @@ -813,6 +895,8 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); @@ -826,6 +910,7 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; bool saveLR = (opcode&PPC_OPC_LK)!=0; + // since we skip this instruction if the condition is true, we need to invert the logic bool invertedConditionMustBeTrue = !conditionMustBeTrue; if( useDecrementer ) @@ -839,51 +924,63 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { if( ignoreCondition ) { - // store LR + // branch always, no condition and no decrementer if( saveLR ) { uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + } + if (saveLR) ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); - } else - { - // branch always, no condition and no decrementer ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } } else { - // store LR - if( saveLR ) + // get jump condition + if (invertedConditionMustBeTrue) { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - // generate jump condition - if( invertedConditionMustBeTrue ) - { - if( crBit == 0 ) + if (crBit == 0) jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) + else if (crBit == 1) jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) + else if (crBit == 2) jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) + else if (crBit == 3) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } else { - if( crBit == 0 ) + if (crBit == 0) jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) + else if (crBit == 1) jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) + else if (crBit == 2) jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) + else if (crBit == 3) jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; } + + // debug checks + //if (saveLR) + // cemu_assert_debug(ppcImlGenContext->currentBasicBlock->); + + // we always store LR + if (saveLR) + { + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction + 4) & 0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + } + + // write the BCTR instruction to a new segment that is set as a branch target for the current segment + __debugbreak(); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + + //PPCBasicBlockInfo* bctrSeg = currentBasicBlock->Get + __debugbreak(); + + // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); @@ -2915,12 +3012,6 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont return v; } -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset) -{ - uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction + offset/4)); - return v; -} - uint32 PPCRecompiler_getCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext) { uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction)); @@ -3864,268 +3955,884 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) return unsupportedInstructionFound; } -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses) +// returns false if code flow is not interrupted +// continueDefaultPath: Controls if +bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget) { - ppcImlGenContext.functionRef = ppcRecFunc; + hasBranchTarget = false; + branchTarget = 0xFFFFFFFF; + makeNextInstEnterable = false; + continueDefaultPath = false; + switch (Espresso::GetPrimaryOpcode(opcode)) + { + case Espresso::PrimaryOpcode::VIRTUAL_HLE: + { + makeNextInstEnterable = true; + hasBranchTarget = false; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::BC: + { + uint32 BD, BI; + Espresso::BOField BO; + bool AA, LK; + Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK); + if (!LK) + { + hasBranchTarget = true; + branchTarget = (AA ? BD : BD) + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump + } + makeNextInstEnterable = LK; + continueDefaultPath = true; + return true; + } + case Espresso::PrimaryOpcode::B: + { + uint32 LI; + bool AA, LK; + Espresso::decodeOp_B(opcode, LI, AA, LK); + if (!LK) + { + hasBranchTarget = true; + branchTarget = AA ? LI : LI + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump + } + makeNextInstEnterable = LK; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::GROUP_19: + switch (Espresso::GetGroup19Opcode(opcode)) + { + //case Espresso::Opcode19::BCLR: + ////case Espresso::Opcode19::BCCTR: + //{ + // continueDefaultPath = false; // todo - set this to true if this instruction has a condition (including decrementer check) + // makeNextInstEnterable = Espresso::DecodeLK(opcode); + // return true; + //} + + case Espresso::Opcode19::BCLR: + case Espresso::Opcode19::BCCTR: + { + Espresso::BOField BO; + uint32 BI; + bool LK; + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); + continueDefaultPath = !BO.conditionIgnore() || !BO.decrementerIgnore(); // if branch is always taken then there is no continued path + makeNextInstEnterable = Espresso::DecodeLK(opcode); + return true; + } + default: + break; + } + break; + case Espresso::PrimaryOpcode::GROUP_31: + switch (Espresso::GetGroup31Opcode(opcode)) + { + //case Espresso::Opcode31::TW: + // continueDefaultPath = true; + // return true; + //case Espresso::Opcode31::MFTB: + // continueDefaultPath = true; + // return true; + //case Espresso::Opcode19::BCLR: + //case Espresso::Opcode19::BCCTR: + //{ + // continueDefaultPath = false; + // makeNextInstEnterable = Espresso::DecodeLK(opcode); + // return true; + //} + default: + break; + } + break; + default: + break; + } + return false; +} + +void PPCRecompiler_DetermineBasicBlockRange(std::vector& basicBlockList, PPCFunctionBoundaryTracker& boundaryTracker, uint32 ppcStart, uint32 ppcEnd, const std::set& combinedBranchTargets, const std::set& entryAddresses) +{ + cemu_assert_debug(ppcStart <= ppcEnd); + + uint32 currentAddr = ppcStart; + + PPCBasicBlockInfo* curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + + uint32 basicBlockStart = currentAddr; + while (currentAddr <= ppcEnd) + { + curBlockInfo->lastAddress = currentAddr; + uint32 opcode = memory_readU32(currentAddr); + bool nextInstIsEnterable = false; + bool hasBranchTarget = false; + bool hasContinuedFlow = false; + uint32 branchTarget = 0; + if (PPCRecompiler_CheckIfInstructionEndsSegment(boundaryTracker, currentAddr, opcode, nextInstIsEnterable, hasContinuedFlow, hasBranchTarget, branchTarget)) + { + curBlockInfo->hasBranchTarget = hasBranchTarget; + curBlockInfo->branchTarget = branchTarget; + curBlockInfo->hasContinuedFlow = hasContinuedFlow; + // start new basic block, except if this is the last instruction + if (currentAddr >= ppcEnd) + break; + curBlockInfo = &basicBlockList.emplace_back(currentAddr + 4, entryAddresses); + curBlockInfo->isEnterable = curBlockInfo->isEnterable || nextInstIsEnterable; + currentAddr += 4; + continue; + } + currentAddr += 4; + if (currentAddr <= ppcEnd) + { + if (combinedBranchTargets.find(currentAddr) != combinedBranchTargets.end()) + { + // instruction is branch target, start new basic block + curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + } + } + + } +} + +std::vector PPCRecompiler_DetermineBasicBlockRange(PPCFunctionBoundaryTracker& boundaryTracker, const std::set& entryAddresses) +{ + cemu_assert(!entryAddresses.empty()); + std::vector basicBlockList; + + const std::set branchTargets = boundaryTracker.GetBranchTargets(); + auto funcRanges = boundaryTracker.GetRanges(); + + std::set combinedBranchTargets = branchTargets; + combinedBranchTargets.insert(entryAddresses.begin(), entryAddresses.end()); + + for (auto& funcRangeIt : funcRanges) + PPCRecompiler_DetermineBasicBlockRange(basicBlockList, boundaryTracker, funcRangeIt.startAddress, funcRangeIt.startAddress + funcRangeIt.length - 4, combinedBranchTargets, entryAddresses); + + // mark all segments that start at entryAddresses as enterable (debug code for verification, can be removed) + size_t numMarkedEnterable = 0; + for (auto& basicBlockIt : basicBlockList) + { + if (entryAddresses.find(basicBlockIt.startAddress) != entryAddresses.end()) + { + cemu_assert_debug(basicBlockIt.isEnterable); + numMarkedEnterable++; + } + } + cemu_assert_debug(numMarkedEnterable == entryAddresses.size()); + + // todo - inline BL, currently this is done in the instruction handler of BL but this will mean that instruction cycle increasing is ignored + + return basicBlockList; +} + +bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + ppcImlGenContext.currentOutputSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + ppcImlGenContext.currentInstruction = (uint32*)(memory_base + basicBlockInfo.startAddress); + + uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; + uint32* endCurrentInstruction = (uint32*)(memory_base + basicBlockInfo.lastAddress); + + while (ppcImlGenContext.currentInstruction <= endCurrentInstruction) + { + uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); + ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; + if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) + { + debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction); + ppcImlGenContext.currentOutputSegment = nullptr; + return false; + } + } + ppcImlGenContext.currentOutputSegment = nullptr; + return true; +} + +// returns split segment from which the continued segment is available via seg->GetBranchNotTaken() +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + //IMLSegment* continuedSegment = ppcImlGenContext.NewSegment(); + IMLSegment* continuedSegment = ppcImlGenContext.InsertSegment(ppcImlGenContext.GetSegmentIndex(writeSegment) + 1); + + continuedSegment->SetLinkBranchTaken(writeSegment->GetBranchTaken()); + continuedSegment->SetLinkBranchNotTaken(writeSegment->GetBranchNotTaken()); + + writeSegment->SetLinkBranchNotTaken(continuedSegment); + writeSegment->SetLinkBranchTaken(nullptr); + + if (ppcImlGenContext.currentOutputSegment == writeSegment) + ppcImlGenContext.currentOutputSegment = continuedSegment; + + cemu_assert_debug(basicBlockInfo.appendSegment == writeSegment); + basicBlockInfo.appendSegment = continuedSegment; + + return writeSegment; +} + +// generates a new segment and sets it as branch target for the current write segment. Returns the created segment +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + IMLSegment* branchTargetSegment = ppcImlGenContext.NewSegment(); + cemu_assert_debug(!writeSegment->GetBranchTaken()); // must not have a target already + writeSegment->SetLinkBranchTaken(branchTargetSegment); + return branchTargetSegment; +} + +// verify that current instruction is the last instruction of the active basic block +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext) +{ + cemu_assert_debug(ppcImlGenContext.currentBasicBlock->lastAddress == ppcImlGenContext.ppcAddressOfCurrentInstruction); +} + +void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* imlSegment = basicBlockInfo.GetFirstSegmentInChain(); + //if (imlSegment->imlList.empty()) + // return; + //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + // return; + //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) + // return; + if (!basicBlockInfo.hasBranchTarget) + return; + if (basicBlockInfo.branchTarget >= basicBlockInfo.startAddress) + return; + + // exclude non-infinite tight loops + if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + return; + // potential loop segment found, split this segment into four: + // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) + // P1: This segment contains the ppc_leave instruction + // P2: This segment contains the iml instructions of the original segment + // PEntry: This segment is used to enter the function, it jumps to P0 + // All segments are considered to be part of the same PPC instruction range + // The first segment also retains the jump destination and enterable properties from the original segment. + //debug_printf("--- Insert cycle counter check ---\n"); + + + // make the segment enterable so execution can return after checking + basicBlockInfo.GetFirstSegmentInChain()->SetEnterable(basicBlockInfo.startAddress); + + IMLSegment* splitSeg = PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext, basicBlockInfo); + + // what we know about the crash: + // It doesnt happen with cycle checks disabled + // The debugbreak emitted here is only encountered twice before it crashes + // it doesnt seem to go into the alternative branch (cycles negative) -> tested (debugbreak in exit segment doesnt trigger) + // Its the enterable segment that causes issues? -> I removed the enterable statement and it still happened + // Maybe some general issue with getting x64 offsets for enterable segments.. + + // possible explanations: + // issue with the cycle check / exit logic + // returning from exit is causing the issue + // Segments can get marked as jump destination which we no longer do -> Deleted old code and added asserts + + IMLInstruction* inst = splitSeg->AppendInstruction(); + inst->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + inst->operation = 0; + inst->crRegister = PPC_REC_INVALID_REGISTER; + inst->op_conditionalJump.jumpmarkAddress = 0xFFFFFFFF; + inst->associatedPPCAddress = 0xFFFFFFFF; + // PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK + + //splitSeg->AppendInstruction()->make_macro(PPCREC_IML_TYPE_MACRO, ) + + IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); + splitSeg->SetLinkBranchTaken(exitSegment); + + + //exitSegment->AppendInstruction()->make_debugbreak(); + + inst = exitSegment->AppendInstruction();// ->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress); + inst->type = PPCREC_IML_TYPE_MACRO; + inst->operation = PPCREC_IML_MACRO_LEAVE; + inst->crRegister = PPC_REC_INVALID_REGISTER; + inst->op_macro.param = basicBlockInfo.startAddress; + inst->associatedPPCAddress = basicBlockInfo.startAddress; + + + //debug_printf("----------------------------------------\n"); + //IMLDebug_Dump(&ppcImlGenContext); + //__debugbreak(); + + //ppcImlGenContext.NewSegment(); + + //PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); + //imlSegment = NULL; + //IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + //IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + //IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; + //// create entry point segment + //PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + //IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; + //// relink segments + //IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + //IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + //IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + //IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + //// update segments + //uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; + //if (imlSegmentP2->isEnterable) + // enterPPCAddress = imlSegmentP2->enterPPCAddress; + //imlSegmentP0->ppcAddress = 0xFFFFFFFF; + //imlSegmentP1->ppcAddress = 0xFFFFFFFF; + //imlSegmentP2->ppcAddress = 0xFFFFFFFF; + //cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); + //// move segment properties from segment P2 to segment P0 + //imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; + //imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; + //imlSegmentP0->isEnterable = false; + ////imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; + //imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; + //imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; + //imlSegmentP2->isJumpDestination = false; + //imlSegmentP2->jumpDestinationPPCAddress = 0; + //imlSegmentP2->isEnterable = false; + //imlSegmentP2->enterPPCAddress = 0; + //imlSegmentP2->ppcAddrMin = 0; + //imlSegmentP2->ppcAddrMax = 0; + //// setup enterable segment + //if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) + //{ + // imlSegmentPEntry->isEnterable = true; + // imlSegmentPEntry->ppcAddress = enterPPCAddress; + // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; + //} + //// assign new jumpmark to segment P2 + //imlSegmentP2->isJumpDestination = true; + //imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; + //currentLoopEscapeJumpMarker++; + //// create ppc_leave instruction in segment P1 + //PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); + //imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; + //imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; + //imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + //imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; + //imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + //// create cycle-based conditional instruction in segment P0 + //PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); + //imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + //imlSegmentP0->imlList[0].operation = 0; + //imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + //imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; + //imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + //// jump instruction for PEntry + //PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); + //PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); +} + +void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) +{ + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + bool isLastSegment = segIt == ppcImlGenContext.segmentList2.back(); + //IMLSegment* nextSegment = isLastSegment ? nullptr : ppcImlGenContext->segmentList2[s + 1]; + // handle empty segment + if (segIt->imlList.empty()) + { + cemu_assert_debug(segIt->GetBranchNotTaken()); + continue; + } + // check last instruction of segment + IMLInstruction* imlInstruction = segIt->GetLastInstruction(); + if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + cemu_assert_debug(segIt->GetBranchTaken()); + if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + { + cemu_assert_debug(segIt->GetBranchNotTaken()); + } + + //// find destination segment by ppc jump address + //IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + //if (jumpDestSegment) + //{ + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); + //} + //else + //{ + // imlSegment->nextSegmentIsUncertain = true; + //} + } + else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + { + auto macroType = imlInstruction->operation; + switch (macroType) + { + case PPCREC_IML_MACRO_BLR: + case PPCREC_IML_MACRO_BLRL: + case PPCREC_IML_MACRO_BCTR: + case PPCREC_IML_MACRO_BCTRL: + case PPCREC_IML_MACRO_BL: + case PPCREC_IML_MACRO_B_FAR: + case PPCREC_IML_MACRO_HLE: + case PPCREC_IML_MACRO_LEAVE: + segIt->nextSegmentIsUncertain = true; + break; + case PPCREC_IML_MACRO_DEBUGBREAK: + case PPCREC_IML_MACRO_COUNT_CYCLES: + case PPCREC_IML_MACRO_MFTB: + break; + default: + cemu_assert_unimplemented(); + } + } + } +} + +bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunctionBoundaryTracker& boundaryTracker, std::set& entryAddresses) +{ + std::vector basicBlockList = PPCRecompiler_DetermineBasicBlockRange(boundaryTracker, entryAddresses); + + // create segments + std::unordered_map addrToBB; + ppcImlGenContext.segmentList2.resize(basicBlockList.size()); + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = new IMLSegment(); + seg->ppcAddress = basicBlockInfo.startAddress; + if(basicBlockInfo.isEnterable) + seg->SetEnterable(basicBlockInfo.startAddress); + ppcImlGenContext.segmentList2[i] = seg; + cemu_assert_debug(addrToBB.find(basicBlockInfo.startAddress) == addrToBB.end()); + basicBlockInfo.SetInitialSegment(seg); + addrToBB.emplace(basicBlockInfo.startAddress, &basicBlockInfo); + } + // link segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& bbInfo = basicBlockList[i]; + cemu_assert_debug(bbInfo.GetFirstSegmentInChain() == bbInfo.GetSegmentForInstructionAppend()); + IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + if (bbInfo.hasBranchTarget) + { + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.branchTarget]; + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchTaken(seg, targetBB->GetFirstSegmentInChain()); + } + if (bbInfo.hasContinuedFlow) + { + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.lastAddress + 4]; + if (!targetBB) + { + cemuLog_log(LogType::Recompiler, "Recompiler was unable to link segment [0x{:08x}-0x{:08x}] to 0x{:08x}", bbInfo.startAddress, bbInfo.lastAddress, bbInfo.lastAddress + 4); + return false; + } + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchNotTaken(seg, targetBB->GetFirstSegmentInChain()); + } + } + // we assume that all unreachable segments are potentially enterable + // todo - mark them as such + + + // generate cycle counters + // in theory we could generate these as part of FillBasicBlock() but in the future we might use more complex logic to emit fewer operations + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = basicBlockInfo.GetSegmentForInstructionAppend(); + + uint32 ppcInstructionCount = (basicBlockInfo.lastAddress - basicBlockInfo.startAddress + 4) / 4; + cemu_assert_debug(ppcInstructionCount > 0); + + PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); + seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; + seg->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + seg->imlList[0].op_macro.param = ppcInstructionCount; + } + + // generate cycle check instructions + // note: Introduces new segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext, basicBlockInfo); + } + + // fill in all the basic blocks + // note: This step introduces new segments as is necessary for some instructions + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + ppcImlGenContext.currentBasicBlock = &basicBlockInfo; + if (!PPCIMLGen_FillBasicBlock(ppcImlGenContext, basicBlockInfo)) + return false; + ppcImlGenContext.currentBasicBlock = nullptr; + } + + // mark segments with unknown jump destination (e.g. BLR and most macros) + PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext); + + // debug - check segment graph +#ifdef CEMU_DEBUG_ASSERT + //for (size_t i = 0; i < basicBlockList.size(); i++) + //{ + // IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + // if (seg->list_prevSegments.empty()) + // { + // cemu_assert_debug(seg->isEnterable); + // } + //} + // debug - check if suffix instructions are at the end of segments and if they are present for branching segments + for (size_t segIndex = 0; segIndex < ppcImlGenContext.segmentList2.size(); segIndex++) + { + IMLSegment* seg = ppcImlGenContext.segmentList2[segIndex]; + IMLSegment* nextSeg = (segIndex+1) < ppcImlGenContext.segmentList2.size() ? ppcImlGenContext.segmentList2[segIndex + 1] : nullptr; + + if (seg->imlList.size() > 0) + { + for (size_t f = 0; f < seg->imlList.size() - 1; f++) + { + if (seg->imlList[f].IsSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + __debugbreak(); + } + } + } + if (seg->nextSegmentBranchTaken) + { + if (!seg->HasSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + __debugbreak(); + } + } + if (seg->nextSegmentBranchNotTaken) + { + // if branch not taken, flow must continue to next segment in sequence + cemu_assert_debug(seg->nextSegmentBranchNotTaken == nextSeg); + } + // more detailed checks based on actual suffix instruction + if (seg->imlList.size() > 0) + { + IMLInstruction* inst = seg->GetLastInstruction(); + if (inst->type == PPCREC_IML_TYPE_MACRO && inst->op_macro.param == PPCREC_IML_MACRO_B_FAR) + { + cemu_assert_debug(!seg->GetBranchTaken()); + cemu_assert_debug(!seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + cemu_assert_debug(seg->GetBranchTaken()); + cemu_assert_debug(seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CJUMP) + { + if (inst->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + { + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) + { + debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); + } + } + else + { + // proper error checking for branch-always (or branch-never if invert bit is set) + } + } + } + //if (seg->list_prevSegments.empty()) + //{ + // cemu_assert_debug(seg->isEnterable); + //} + segIndex++; + } +#endif + + + // todos: + // - basic block determination should look for the B(L) B(L) pattern. Or maybe just mark every bb without any input segments as an entry segment + + return true; +} + +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) +{ + ppcImlGenContext.functionRef = ppcRecFunc; // todo - remove this and replace internally with boundary tracker + ppcImlGenContext.boundaryTracker = &boundaryTracker; + + if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) + return false; + // add entire range ppcRecRange_t recRange; recRange.ppcAddress = ppcRecFunc->ppcAddress; recRange.ppcSize = ppcRecFunc->ppcSize; ppcRecFunc->list_ranges.push_back(recRange); // process ppc instructions - ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); - bool unsupportedInstructionFound = false; - sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; - sint32 unsupportedInstructionCount = 0; - uint32 unsupportedInstructionLastOffset = 0; - uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; - uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; - - while(ppcImlGenContext.currentInstruction < endCurrentInstruction) - { - uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); - ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; - ppcImlGenContext.cyclesSinceLastBranch++; - ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); - if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) - { - // add PPCEnter for addresses that are in entryAddresses - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) - { - // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) - uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); - uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); - if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) - { - // between two B(L) instructions - // todo: for BL only if they are not inlineable - - bool canInlineFunction = false; - if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) - { - uint32 li; - PPC_OPC_TEMPL_I(opcodePrevious, li); - sint32 inlineSize = 0; - if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) - canInlineFunction = true; - } - if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) - { - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); - if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) - { - // after unconditional BCTR instruction - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - } - } - - unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); - if( unsupportedInstructionFound ) - { - unsupportedInstructionCount++; - unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; - unsupportedInstructionFound = false; - //break; - } - } - ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) - if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) - { - debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); - return false; - } - // optimize unused jumpmarks away - // first, flag all jumpmarks as unused - std::map map_jumpMarks; - for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; - } - } - // lastly, remove jumpmarks that still have the unused flag set - sint32 currentImlIndex = 0; - for(sint32 i=0; i end of segment after current instruction - // If we encounter a jumpmark -> end of segment before current instruction - // If we encounter ppc_enter -> end of segment before current instruction - if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) - { - // segment ends after current instruction - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart+1; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex+1; - } - else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || - ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // segment ends before current instruction - if( segmentImlIndex > segmentStart ) - { - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; - } - } - segmentImlIndex++; - } - if( segmentImlIndex != segmentStart ) - { - // final segment - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; - } - // move iml instructions into the segments - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - uint32 imlStartIndex = segIt->startOffset; - uint32 imlCount = segIt->count; - if( imlCount > 0 ) - { - cemu_assert_debug(segIt->imlList.empty()); - segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); - - } - else - { - // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - cemu_assert_debug(segIt->imlList.empty()); - } - segIt->startOffset = 9999999; - segIt->count = 9999999; - } - // clear segment-independent iml list - free(ppcImlGenContext.imlList); - ppcImlGenContext.imlList = nullptr; - ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList - // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - uint32 segmentPPCAddrMin = 0xFFFFFFFF; - uint32 segmentPPCAddrMax = 0x00000000; - for(sint32 i=0; i< segIt->imlList.size(); i++) - { - if(segIt->imlList[i].associatedPPCAddress == 0 ) - continue; - //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) - // continue; // jumpmarks and no-op instructions must not affect segment ppc address range - segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); - segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); - } - if( segmentPPCAddrMin != 0xFFFFFFFF ) - { - segIt->ppcAddrMin = segmentPPCAddrMin; - segIt->ppcAddrMax = segmentPPCAddrMax; - } - else - { - segIt->ppcAddrMin = 0; - segIt->ppcAddrMax = 0; - } - } - // certain instructions can change the segment state - // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) - // jumpmarks mark the segment as a jump destination (within the same function) - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - while (segIt->imlList.size() > 0) - { - if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) - { - // mark segment as enterable - if (segIt->isEnterable) - assert_dbg(); // should not happen? - segIt->isEnterable = true; - segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; - // remove ppc_enter instruction - segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].associatedPPCAddress = 0; - } - else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) - { - // mark segment as jump destination - if(segIt->isJumpDestination ) - assert_dbg(); // should not happen? - segIt->isJumpDestination = true; - segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; - // remove jumpmark instruction - segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].associatedPPCAddress = 0; - } - else - break; - } - } - // the first segment is always enterable as the recompiled functions entrypoint - ppcImlGenContext.segmentList2[0]->isEnterable = true; - ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; - - // link segments for further inter-segment optimization - PPCRecompilerIML_linkSegments(&ppcImlGenContext); +// ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); +// bool unsupportedInstructionFound = false; +// sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; +// sint32 unsupportedInstructionCount = 0; +// uint32 unsupportedInstructionLastOffset = 0; +// uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; +// uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; +// +// while(ppcImlGenContext.currentInstruction < endCurrentInstruction) +// { +// uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); +// ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; +// ppcImlGenContext.cyclesSinceLastBranch++; +// ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); +// if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) +// { +// // add PPCEnter for addresses that are in entryAddresses +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) +// { +// // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) +// uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); +// uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); +// if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) +// { +// // between two B(L) instructions +// // todo: for BL only if they are not inlineable +// +// bool canInlineFunction = false; +// if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) +// { +// uint32 li; +// PPC_OPC_TEMPL_I(opcodePrevious, li); +// sint32 inlineSize = 0; +// if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) +// canInlineFunction = true; +// } +// if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) +// { +// uint32 BO, BI, BD; +// PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); +// if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) +// { +// // after unconditional BCTR instruction +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// } +// } +// +// unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); +// if( unsupportedInstructionFound ) +// { +// unsupportedInstructionCount++; +// unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; +// unsupportedInstructionFound = false; +// //break; +// } +// } +// ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) +// if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) +// { +// debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); +// return false; +// } +// // optimize unused jumpmarks away +// // first, flag all jumpmarks as unused +// std::map map_jumpMarks; +// for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; +// } +// } +// // lastly, remove jumpmarks that still have the unused flag set +// sint32 currentImlIndex = 0; +// for(sint32 i=0; i end of segment after current instruction +// // If we encounter a jumpmark -> end of segment before current instruction +// // If we encounter ppc_enter -> end of segment before current instruction +// if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) +// { +// // segment ends after current instruction +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart+1; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex+1; +// } +// else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || +// ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) +// { +// // segment ends before current instruction +// if( segmentImlIndex > segmentStart ) +// { +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex; +// } +// } +// segmentImlIndex++; +// } +// if( segmentImlIndex != segmentStart ) +// { +// // final segment +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex; +// } +// // move iml instructions into the segments +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// uint32 imlStartIndex = segIt->startOffset; +// uint32 imlCount = segIt->count; +// if( imlCount > 0 ) +// { +// cemu_assert_debug(segIt->imlList.empty()); +// segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); +// +// } +// else +// { +// // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code +// cemu_assert_debug(segIt->imlList.empty()); +// } +// segIt->startOffset = 9999999; +// segIt->count = 9999999; +// } +// // clear segment-independent iml list +// free(ppcImlGenContext.imlList); +// ppcImlGenContext.imlList = nullptr; +// ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList +// // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// uint32 segmentPPCAddrMin = 0xFFFFFFFF; +// uint32 segmentPPCAddrMax = 0x00000000; +// for(sint32 i=0; i< segIt->imlList.size(); i++) +// { +// if(segIt->imlList[i].associatedPPCAddress == 0 ) +// continue; +// //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) +// // continue; // jumpmarks and no-op instructions must not affect segment ppc address range +// segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); +// segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); +// } +// if( segmentPPCAddrMin != 0xFFFFFFFF ) +// { +// segIt->ppcAddrMin = segmentPPCAddrMin; +// segIt->ppcAddrMax = segmentPPCAddrMax; +// } +// else +// { +// segIt->ppcAddrMin = 0; +// segIt->ppcAddrMax = 0; +// } +// } +// // certain instructions can change the segment state +// // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) +// // jumpmarks mark the segment as a jump destination (within the same function) +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// while (segIt->imlList.size() > 0) +// { +// if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) +// { +// // mark segment as enterable +// if (segIt->isEnterable) +// assert_dbg(); // should not happen? +// segIt->isEnterable = true; +// segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; +// // remove ppc_enter instruction +// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; +// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; +// segIt->imlList[0].associatedPPCAddress = 0; +// } +// else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) +// { +// // mark segment as jump destination +// if(segIt->isJumpDestination ) +// assert_dbg(); // should not happen? +// segIt->isJumpDestination = true; +// segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; +// // remove jumpmark instruction +// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; +// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; +// segIt->imlList[0].associatedPPCAddress = 0; +// } +// else +// break; +// } +// } +// // the first segment is always enterable as the recompiled functions entrypoint +// ppcImlGenContext.segmentList2[0]->isEnterable = true; +// ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; +// +// // link segments for further inter-segment optimization +// PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible for (IMLSegment* segIt : ppcImlGenContext.segmentList2) @@ -4215,129 +4922,132 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert cycle counter instruction in every segment that has a cycle count greater zero - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if( segIt->ppcAddrMin == 0 ) - continue; - // count number of PPC instructions in segment - // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions - uint32 lastPPCInstAddr = 0; - uint32 ppcCount2 = 0; - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - if (segIt->imlList[i].associatedPPCAddress == 0) - continue; - if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) - continue; - lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; - ppcCount2++; - } - //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions - uint32 cycleCount = ppcCount2;// ppcCount / 4; - if( cycleCount > 0 ) - { - PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); - segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - segIt->imlList[0].op_macro.param = cycleCount; - } - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if( segIt->ppcAddrMin == 0 ) + // continue; + // // count number of PPC instructions in segment + // // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions + // uint32 lastPPCInstAddr = 0; + // uint32 ppcCount2 = 0; + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // if (segIt->imlList[i].associatedPPCAddress == 0) + // continue; + // if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) + // continue; + // lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; + // ppcCount2++; + // } + // //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions + // uint32 cycleCount = ppcCount2;// ppcCount / 4; + // if( cycleCount > 0 ) + // { + // PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); + // segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; + // segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + // segIt->imlList[0].op_macro.param = cycleCount; + // } + //} return true; } void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext) { - // find segments that have a (conditional) jump instruction that points in reverse direction of code flow - // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. - // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) - uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) - { - // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; - if (imlSegment->imlList.empty()) - continue; - if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - continue; - if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) - continue; - // exclude non-infinite tight loops - if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) - continue; - // potential loop segment found, split this segment into four: - // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. - // P2: This segment contains the iml instructions of the original segment - // PEntry: This segment is used to enter the function, it jumps to P0 - // All segments are considered to be part of the same PPC instruction range - // The first segment also retains the jump destination and enterable properties from the original segment. - //debug_printf("--- Insert cycle counter check ---\n"); + return; // deprecated - PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - imlSegment = NULL; - IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; - IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; - IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; - // create entry point segment - PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; - // relink segments - IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); - IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); - IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - // update segments - uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - if (imlSegmentP2->isEnterable) - enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddress = 0xFFFFFFFF; - imlSegmentP1->ppcAddress = 0xFFFFFFFF; - imlSegmentP2->ppcAddress = 0xFFFFFFFF; - cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - // move segment properties from segment P2 to segment P0 - imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->isEnterable = false; - //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - imlSegmentP2->isJumpDestination = false; - imlSegmentP2->jumpDestinationPPCAddress = 0; - imlSegmentP2->isEnterable = false; - imlSegmentP2->enterPPCAddress = 0; - imlSegmentP2->ppcAddrMin = 0; - imlSegmentP2->ppcAddrMax = 0; - // setup enterable segment - if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) - { - imlSegmentPEntry->isEnterable = true; - imlSegmentPEntry->ppcAddress = enterPPCAddress; - imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - } - // assign new jumpmark to segment P2 - imlSegmentP2->isJumpDestination = true; - imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - currentLoopEscapeJumpMarker++; - // create ppc_leave instruction in segment P1 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // create cycle-based conditional instruction in segment P0 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - imlSegmentP0->imlList[0].operation = 0; - imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // jump instruction for PEntry - PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); + //// find segments that have a (conditional) jump instruction that points in reverse direction of code flow + //// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. + //// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) + //uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located + //for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) + //{ + // // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) + // IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; + // if (imlSegment->imlList.empty()) + // continue; + // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + // continue; + // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) + // continue; - // skip the newly created segments - s += 2; - } + // // exclude non-infinite tight loops + // if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + // continue; + // // potential loop segment found, split this segment into four: + // // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) + // // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. + // // P2: This segment contains the iml instructions of the original segment + // // PEntry: This segment is used to enter the function, it jumps to P0 + // // All segments are considered to be part of the same PPC instruction range + // // The first segment also retains the jump destination and enterable properties from the original segment. + // //debug_printf("--- Insert cycle counter check ---\n"); + + // PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); + // imlSegment = NULL; + // IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + // IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + // IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; + // // create entry point segment + // PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + // IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; + // // relink segments + // IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + // IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + // IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + // IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + // // update segments + // uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; + // if (imlSegmentP2->isEnterable) + // enterPPCAddress = imlSegmentP2->enterPPCAddress; + // imlSegmentP0->ppcAddress = 0xFFFFFFFF; + // imlSegmentP1->ppcAddress = 0xFFFFFFFF; + // imlSegmentP2->ppcAddress = 0xFFFFFFFF; + // cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); + // // move segment properties from segment P2 to segment P0 + // imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; + // imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; + // imlSegmentP0->isEnterable = false; + // //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; + // imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; + // imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; + // imlSegmentP2->isJumpDestination = false; + // imlSegmentP2->jumpDestinationPPCAddress = 0; + // imlSegmentP2->isEnterable = false; + // imlSegmentP2->enterPPCAddress = 0; + // imlSegmentP2->ppcAddrMin = 0; + // imlSegmentP2->ppcAddrMax = 0; + // // setup enterable segment + // if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) + // { + // imlSegmentPEntry->isEnterable = true; + // imlSegmentPEntry->ppcAddress = enterPPCAddress; + // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; + // } + // // assign new jumpmark to segment P2 + // imlSegmentP2->isJumpDestination = true; + // imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; + // currentLoopEscapeJumpMarker++; + // // create ppc_leave instruction in segment P1 + // PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); + // imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; + // imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; + // imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; + // imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + // // create cycle-based conditional instruction in segment P0 + // PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); + // imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + // imlSegmentP0->imlList[0].operation = 0; + // imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; + // imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + // // jump instruction for PEntry + // PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); + // PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); + + // // skip the newly created segments + // s += 2; + //} } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index ccb0fc83..7b4b94fb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -3,63 +3,67 @@ IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) { - for(IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) - { - return segIt; - } - } - debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); + __debugbreak(); return nullptr; + //for(IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) + // { + // return segIt; + // } + //} + //debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); + //return nullptr; } void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) { - size_t segCount = ppcImlGenContext->segmentList2.size(); - for(size_t s=0; ssegmentList2[s]; + __debugbreak(); // outdated - bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); - IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; - // handle empty segment - if( imlSegment->imlList.empty()) - { - if (isLastSegment == false) - IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment - else - imlSegment->nextSegmentIsUncertain = true; - continue; - } - // check last instruction of segment - IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); - if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // find destination segment by ppc jump address - IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); - if( jumpDestSegment ) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); - } - else - { - imlSegment->nextSegmentIsUncertain = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - // currently we assume that the next segment is unknown for all macros - imlSegment->nextSegmentIsUncertain = true; - } - else - { - // all other instruction types do not branch - IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - } - } + //size_t segCount = ppcImlGenContext->segmentList2.size(); + //for(size_t s=0; ssegmentList2[s]; + + // bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); + // IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; + // // handle empty segment + // if( imlSegment->imlList.empty()) + // { + // if (isLastSegment == false) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment + // else + // imlSegment->nextSegmentIsUncertain = true; + // continue; + // } + // // check last instruction of segment + // IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); + // if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) + // { + // // find destination segment by ppc jump address + // IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + // if( jumpDestSegment ) + // { + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); + // } + // else + // { + // imlSegment->nextSegmentIsUncertain = true; + // } + // } + // else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) + // { + // // currently we assume that the next segment is unknown for all macros + // imlSegment->nextSegmentIsUncertain = true; + // } + // else + // { + // // all other instruction types do not branch + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // } + //} } void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) diff --git a/src/Cemu/Logging/CemuLogging.h b/src/Cemu/Logging/CemuLogging.h index 5b2e5fa4..d729d364 100644 --- a/src/Cemu/Logging/CemuLogging.h +++ b/src/Cemu/Logging/CemuLogging.h @@ -39,7 +39,6 @@ enum class LogType : sint32 NN_SL = 26, TextureReadback = 29, - ProcUi = 39, nlibcurl = 41, @@ -47,6 +46,7 @@ enum class LogType : sint32 NFC = 41, NTAG = 42, + Recompiler = 60, }; template <>