From c4fb7b74f8fdcbd47a7387e96740973d3a0f40c3 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 13 Dec 2022 05:41:26 +0100 Subject: [PATCH] PPCRec: Make LSWI/STWSI more generic + GPR temporaries storage --- .../Recompiler/BackendX64/BackendX64.cpp | 91 +----- .../Espresso/Recompiler/IML/IMLInstruction.h | 99 ++++-- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 2 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 1 - .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 3 - .../Recompiler/PPCRecompilerImlGen.cpp | 282 +++++------------- 7 files changed, 161 insertions(+), 319 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 41bbbd58..bcdda78e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -415,7 +415,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // todo: Use sign extend move from memory instead of separate sign-extend? if( signExtend ) x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); else @@ -440,28 +439,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p // LWARX instruction costs extra cycles (this speeds up busy loops) x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( switchEndian == false ) - assert_dbg(); - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move) - if(g_CPUFeatures.x86.movbe) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00); - } else return false; return true; @@ -599,36 +576,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes .. - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP); - - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } else return false; return true; @@ -1943,40 +1890,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { - if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n"); - return false; - } - if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) { // jump always - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - // jump to segment - if (imlSegment->nextSegmentBranchTaken == nullptr) - assert_dbg(); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); - x64Gen_jmp_imm32(x64GenContext, 0); - } - else - { - // deprecated (jump to jumpmark) - __debugbreak(); // deprecated - //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - //x64Gen_jmp_imm32(x64GenContext, 0); - } + cemu_assert_debug(imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + x64Gen_jmp_imm32(x64GenContext, 0); } else { - if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - debug_printf("Unsupported deprecated cjump to ppc address\n"); - return false; - } cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - // generate jump update marker if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { @@ -2159,6 +2082,10 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, else assert_dbg(); } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } else assert_dbg(); } @@ -2187,6 +2114,10 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, else assert_dbg(); } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + } else assert_dbg(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 871e7966..89f14af4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -163,24 +163,18 @@ enum enum { PPCREC_NAME_NONE, - PPCREC_NAME_TEMPORARY, - PPCREC_NAME_R0 = 1000, - PPCREC_NAME_SPR0 = 2000, - PPCREC_NAME_FPR0 = 3000, - PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7 + PPCREC_NAME_TEMPORARY = 1000, + PPCREC_NAME_R0 = 2000, + PPCREC_NAME_SPR0 = 3000, + PPCREC_NAME_FPR0 = 4000, + PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 }; // special cases for LOAD/STORE #define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) #define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) -#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1 -#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2 -#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3 -#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1 -#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2 -#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3 -#define PPC_REC_INVALID_REGISTER 0xFF +#define PPC_REC_INVALID_REGISTER 0xFF #define PPCREC_CR_BIT_LT 0 #define PPCREC_CR_BIT_GT 1 @@ -312,7 +306,6 @@ struct IMLInstruction }op_macro; struct { - bool jumpAccordingToSegment; //IMLSegment* destinationSegment; // if set, this replaces jumpmarkAddress uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) uint8 crRegisterIndex; uint8 crBitIndex; @@ -413,18 +406,82 @@ struct IMLInstruction void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) { - type = PPCREC_IML_TYPE_MACRO; - operation = macroId; - op_macro.param = param; - op_macro.param2 = param2; - op_macro.paramU16 = paramU16; + this->type = PPCREC_IML_TYPE_MACRO; + this->operation = macroId; + this->op_macro.param = param; + this->op_macro.param2 = param2; + this->op_macro.paramU16 = paramU16; } void make_cjump_cycle_check() { - type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - operation = 0; - crRegister = PPC_REC_INVALID_REGISTER; + this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + this->operation = 0; + this->crRegister = PPC_REC_INVALID_REGISTER; + } + + + void make_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + { + // operation with two register operands (e.g. "t0 = t1") + this->type = PPCREC_IML_TYPE_R_R; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_r.registerResult = registerResult; + this->op_r_r.registerA = registerA; + } + + void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + { + // operation with three register operands (e.g. "t0 = t1 + t4") + this->type = PPCREC_IML_TYPE_R_R_R; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_r_r.registerResult = registerResult; + this->op_r_r_r.registerA = registerA; + this->op_r_r_r.registerB = registerB; + } + + void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) + { + // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") + this->type = PPCREC_IML_TYPE_R_R_S32; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_r_s32.registerResult = registerResult; + this->op_r_r_s32.registerA = registerA; + this->op_r_r_s32.immS32 = immS32; + } + + // load from memory + void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_LOAD; + this->operation = 0; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->op_storeLoad.registerData = registerDestination; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.copyWidth = copyWidth; + this->op_storeLoad.flags2.swapEndian = switchEndian; + this->op_storeLoad.flags2.signExtend = signExtend; + } + + // store to memory + void make_memory_r(uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_STORE; + this->operation = 0; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->op_storeLoad.registerData = registerSource; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.copyWidth = copyWidth; + this->op_storeLoad.flags2.swapEndian = switchEndian; + this->op_storeLoad.flags2.signExtend = false; } void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index a0b2a517..a09d4bab 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -1211,7 +1211,7 @@ void _reorderConditionModifyInstructions(IMLSegment* imlSegment) #endif IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); - PPCRecompilerImlGen_generateNewInstruction_noOp(nullptr, imlSegment->imlList.data() + crSetterInstructionIndex); + imlSegment->imlList[crSetterInstructionIndex].make_no_op(); } /* diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 6b35239a..1dd1f7ba 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -1203,7 +1203,7 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLS { if (routeDepth >= 64) { - forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress); + forceLogDebug_printf("Recompiler RA route maximum depth exceeded\n"); return; } route[routeDepth] = currentSegment; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 10cd0aa0..bd2c02d8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -32,7 +32,6 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp struct ppcImlGenContext_t { class PPCFunctionBoundaryTracker* boundaryTracker; - PPCRecFunction_t* functionRef; uint32* currentInstruction; uint32 ppcAddressOfCurrentInstruction; IMLSegment* currentOutputSegment; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 0521c440..4843fd1c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -26,7 +26,6 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress); void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode); @@ -37,8 +36,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe // IML instruction generation (new style, can generate new instructions but also overwrite existing ones) -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); - void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); // IML generation - FPU diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index e3c818a6..40844bb2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -50,24 +50,9 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { - //if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) - //{ - // sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - // ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); - // ppcImlGenContext->imlListSize = newSize; - //} - //IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - //memset(imlInstruction, 0x00, sizeof(IMLInstruction)); - //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default - //imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - //ppcImlGenContext->imlListCount++; - //return imlInstruction; - IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); memset(&inst, 0x00, sizeof(IMLInstruction)); inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default -//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - return &inst; } @@ -86,28 +71,12 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) { - // operation with three register operands (e.g. "t0 = t1 + t4") - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R_R; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r_r.registerResult = registerResult; - imlInstruction->op_r_r_r.registerA = registerA; - imlInstruction->op_r_r_r.registerB = registerB; + ppcImlGenContext->emitInst().make_r_r_r(operation, registerResult, registerA, registerB, crRegister, crMode); } void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) { - // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r_s32.registerResult = registerResult; - imlInstruction->op_r_r_s32.registerA = registerA; - imlInstruction->op_r_r_s32.immS32 = immS32; + ppcImlGenContext->emitInst().make_r_r_s32(operation, registerResult, registerA, immS32, crRegister, crMode); } void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) @@ -153,25 +122,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte } -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress) -{ - __debugbreak(); - - //// jump - //if (imlInstruction == NULL) - // imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - //else - // memset(imlInstruction, 0, sizeof(IMLInstruction)); - //imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - //imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - //imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - //imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - //imlInstruction->op_conditionalJump.crRegisterIndex = 0; - //imlInstruction->op_conditionalJump.crBitIndex = 0; - //imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - // jump based on segment branches void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) { @@ -180,18 +130,22 @@ void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; imlInstruction->op_conditionalJump.crRegisterIndex = 0; imlInstruction->op_conditionalJump.crBitIndex = 0; imlInstruction->op_conditionalJump.bitMustBeSet = false; } -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) +void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->make_no_op(); + // conditional jump + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.condition = jumpCondition; + imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; + imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; + imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; } void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) @@ -209,48 +163,9 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen imlInstruction->op_cr.crB = crB; } -void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - __debugbreak(); - - //// conditional jump - //IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - //imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - //imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - //imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - //imlInstruction->op_conditionalJump.condition = jumpCondition; - //imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - //imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - //imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - // conditional jump - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { - // load from memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_LOAD; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.copyWidth = copyWidth; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; - imlInstruction->op_storeLoad.flags2.signExtend = signExtend; + ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian); } void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) @@ -270,17 +185,7 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) { - // load from memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_STORE; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.copyWidth = copyWidth; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; - imlInstruction->op_storeLoad.flags2.signExtend = false; + ppcImlGenContext->emitInst().make_memory_r(registerSource, registerMemory, immS32, copyWidth, switchEndian); } void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) @@ -630,10 +535,11 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin } } // add range - ppcRecRange_t recRange; - recRange.ppcAddress = startAddress; - recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR - ppcImlGenContext->functionRef->list_ranges.push_back(recRange); + cemu_assert_unimplemented(); + //ppcRecRange_t recRange; + //recRange.ppcAddress = startAddress; + //recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR + //ppcImlGenContext->functionRef->list_ranges.push_back(recRange); } bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -653,14 +559,9 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) } // is jump destination within recompiled function? if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) - { - // jump to target within same function PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); - } else - { ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - } return true; } @@ -778,10 +679,6 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) // far jump debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet"); return false; - - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - //ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } } } @@ -2190,70 +2087,40 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, rD, rA, nb); if( nb == 0 ) nb = 32; - if( nb == 4 ) + + if (rA == 0) { - // if nb == 4 this instruction immitates LWZ - if( rA == 0 ) - { -#ifdef CEMU_DEBUG_ASSERT - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif - return false; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true); - return true; + cemu_assert_unimplemented(); // special form where gpr is ignored and EA is 0 + return false; } - else if( nb == 2 ) + + // potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift + + uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 memOffset = 0; + while (nb > 0) { - // if nb == 2 this instruction immitates a LHZ but the result is shifted left by 16 bits - if( rA == 0 ) - { -#ifdef CEMU_DEBUG_ASSERT - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif + if (rD == rA) return false; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true); - // shift - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, destinationRegister, destinationRegister, 16); - return true; - } - else if( nb == 3 ) - { - // if nb == 3 this instruction loads a 3-byte big-endian and the result is shifted left by 8 bits - if( rA == 0 ) + cemu_assert(rD < 32); + uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + // load bytes one-by-one + for (sint32 b = 0; b < 4; b++) { -#ifdef CEMU_DEBUG_ASSERT - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif - return false; + ppcImlGenContext->emitInst().make_r_memory(tmpReg, memReg, memOffset + b, 8, false, false); + sint32 shiftAmount = (3 - b) * 8; + if(shiftAmount) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, tmpReg, tmpReg, shiftAmount); + ppcImlGenContext->emitInst().make_r_r(b == 0 ? PPCREC_IML_OP_ASSIGN : PPCREC_IML_OP_OR, destinationRegister, tmpReg); + nb--; + if (nb == 0) + break; } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, PPC_REC_STORE_LSWI_3, false, true); - return true; + memOffset += 4; + rD++; } - debug_printf("PPCRecompilerImlGen_LSWI(): Unsupported nb value %d\n", nb); - return false; + return true; } bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2262,38 +2129,32 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, rS, rA, nb); if( nb == 0 ) nb = 32; - if( nb == 4 ) + + uint32 memReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 tmpReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 memOffset = 0; + while (nb > 0) { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, 32, true); - return true; + if (rS == rA) + return false; + cemu_assert(rS < 32); + uint32 dataRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + // store bytes one-by-one + for (sint32 b = 0; b < 4; b++) + { + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister); + sint32 shiftAmount = (3 - b) * 8; + if (shiftAmount) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, tmpReg, tmpReg, shiftAmount); + ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false); + nb--; + if (nb == 0) + break; + } + memOffset += 4; + rS++; } - else if( nb == 2 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store half-word (shifted << 16) - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_2, false); - return true; - } - else if( nb == 3 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store 3-byte-word (shifted << 8) - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_3, false); - return true; - } - debug_printf("PPCRecompilerImlGen_STSWI(): Unsupported nb value %d\n", nb); - return false; + return true; } bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -4293,9 +4154,7 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) { - ppcImlGenContext.functionRef = ppcRecFunc; // todo - remove this and replace internally with boundary tracker ppcImlGenContext.boundaryTracker = &boundaryTracker; - if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) return false; @@ -4343,8 +4202,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; - - PPCRecompilerImlGen_generateNewInstruction_noOp(&ppcImlGenContext, lastInstruction); + lastInstruction->make_no_op(); // append conditional moves based on branch condition for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++)