From d420622da721eba550c70fbc495fcb5c33c0850c Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 13 Dec 2022 19:19:29 +0100 Subject: [PATCH] PPCRec: Make register pool for RA configurable --- .../Recompiler/BackendX64/BackendX64.cpp | 371 ++++++++---------- .../Recompiler/BackendX64/BackendX64.h | 10 +- .../Recompiler/BackendX64/BackendX64FPU.cpp | 24 +- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 3 - .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 98 ----- .../Recompiler/IML/IMLRegisterAllocator.cpp | 75 ++-- .../Recompiler/IML/IMLRegisterAllocator.h | 94 +++++ .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 17 +- 8 files changed, 330 insertions(+), 362 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index bcdda78e..eef56497 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -8,18 +8,13 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" -sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping -{ - REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX -}; - /* * Remember current instruction output offset for reloc * The instruction generated after this method has been called will be adjusted */ -void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) +void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr) { - x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo); + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, extraInfo); } /* @@ -121,7 +116,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { - uint32 branchDstReg = tempToRealRegister(imlInstruction->op_macro.param); + uint32 branchDstReg = imlInstruction->op_macro.param; if(REG_RDX != branchDstReg) x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, branchDstReg); // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX @@ -323,11 +318,11 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, */ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterData = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; if( indexed && realRegisterMem == realRegisterMem2 ) { return false; @@ -449,11 +444,11 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p */ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterData = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if (indexed) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; if (indexed && realRegisterMem == realRegisterMem2) { @@ -588,11 +583,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // registerResult = registerA if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) { // since MOV doesn't set eflags we need another test instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -603,7 +598,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) @@ -611,24 +606,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // registerResult = endianSwap32(registerA) if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) assert_dbg(); - x64Gen_bswap_reg64Lower32bit(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { // registerResult += registerA PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -645,17 +640,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= registerA - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else { // registerResult ^= registerA - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -669,15 +664,15 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // NOT destination register - x64Gen_not_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); // update cr bits if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { // NOT instruction does not update flags, so we have to generate an additional TEST instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -690,20 +685,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { - x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerA), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA); sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32-1); + x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); + x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1); sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); } } @@ -725,7 +720,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // set cr bits sint32 crRegister = imlInstruction->crRegister; if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) @@ -757,10 +752,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // NEG destination register - x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); // update cr bits if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -774,12 +769,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // copy operand to result if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // copy xer_ca to eflags carry x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 0); + x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0); // update xer carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) @@ -799,19 +794,19 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp // copy operand to result if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } // copy xer_ca to eflags carry x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), (uint32)-1); + x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1); // update xer carry x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { // set cr bits sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } } @@ -819,8 +814,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { // registerResult = ~registerOperand1 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r.registerA); + sint32 rRegResult = imlInstruction->op_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r.registerA; // copy operand to result register x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); // execute NOT on result @@ -850,12 +845,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { // registerResult = (uint32)(sint32)(sint16)registerA PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), reg32ToReg16(tempToRealRegister(imlInstruction->op_r_r.registerA))); + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); // set cr bits PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } @@ -871,8 +866,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerResult)); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerResult); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -881,7 +876,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else { // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); for(sint32 f=0; f<0x20; f+=8) @@ -902,7 +897,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { // registerResult = immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { @@ -912,7 +907,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { assert_dbg(); } - x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { @@ -927,13 +922,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, // update cr register assert_dbg(); } - x64Gen_sub_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) @@ -951,14 +946,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, // registerResult |= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_or_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { // registerResult ^= immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_xor_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); + x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { @@ -967,7 +962,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen - x64Gen_rol_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint8)imlInstruction->op_r_immS32.immS32); + x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { @@ -989,7 +984,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction - x64Gen_cmp_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), imlInstruction->op_r_immS32.immS32); + x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); // set cr bits uint32 crRegister = imlInstruction->crRegister; if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) @@ -1017,7 +1012,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 destRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); + uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); for(sint32 f=0; f<32; f++) { @@ -1028,7 +1023,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 srcRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); + uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); for (sint32 f = 0; f < 32; f++) { @@ -1066,17 +1061,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR { if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } } @@ -1084,17 +1079,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR { if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } } @@ -1102,17 +1097,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR { if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } } @@ -1120,9 +1115,9 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); else - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); + x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); return true; } return false; @@ -1134,9 +1129,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 + registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) @@ -1197,9 +1192,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 - registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if( rRegOperand1 == rRegOperand2 ) { // result = operand1 - operand1 -> 0 @@ -1241,9 +1236,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 - registerOperand2 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if( rRegOperand1 == rRegOperand2 ) { // copy xer_ca to eflags carry @@ -1295,9 +1290,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 * registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it @@ -1334,9 +1329,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { return false; } - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperandA = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperandB = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperandA = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperandB = imlInstruction->op_r_r_r.registerB; // update carry flag // carry flag is detected this way: //if ((~a+b) < a) { @@ -1402,9 +1397,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) { @@ -1455,9 +1450,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; // todo: Use BMI2 rotate if available // check if CL/ECX/RCX is available if( rRegResult != REG_RCX && rRegOperand1 != REG_RCX && rRegOperand2 != REG_RCX ) @@ -1502,9 +1497,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; // save cr if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) { @@ -1576,9 +1571,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); @@ -1621,9 +1616,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); @@ -1669,9 +1664,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { // registerResult = registerOperand1 | ~registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; + sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; + sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); @@ -1705,8 +1700,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // registerResult = registerOperand + immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; if( rRegResult != rRegOperand ) { @@ -1719,8 +1714,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction { // registerResult = registerOperand + immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; if( rRegResult != rRegOperand ) { @@ -1749,8 +1744,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // registerResult = immS32 - registerOperand PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32; if( rRegResult != rRegOperand ) { @@ -1799,23 +1794,23 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // save cr cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy rS to temporary register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA); // rotate destination register if( sh ) x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); // AND destination register with inverted mask - x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), ~mask); + x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, ~mask); // AND temporary rS register with mask x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); // OR result with temporary - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), REG_RESV_TEMP); + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, REG_RESV_TEMP); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); + sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; + sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize if( rRegResult != rRegOperand ) @@ -1829,7 +1824,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32; // MOV registerResult, registerOperand (if different) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); // todo: Detect if we don't need to update carry // generic case // TEST registerResult, (1<<(SH+1))-1 @@ -1838,11 +1833,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction caTestMask = 0x7FFFFFFF; else caTestMask = (1 << (sh)) - 1; - x64Gen_test_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), caTestMask); + x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask); // SETNE/NZ [ESP+XER_CA] x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); // SAR registerResult, SH - x64Gen_sar_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), sh); + x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); // JNS (if sign not set) sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here @@ -1854,7 +1849,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); @@ -1866,17 +1861,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // MOV registerResult, registerOperand (if different) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); + x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); // Shift if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ) - x64Gen_shl_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); + x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); else - x64Gen_shr_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); + x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); // CR update if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { // since SHL/SHR only modifies the OF flag we need another TEST reg,reg here - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } } @@ -1894,7 +1889,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { // jump always cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmp_imm32(x64GenContext, 0); } else @@ -1904,7 +1899,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); sint32 condition = imlInstruction->op_conditionalJump.condition; if( condition == PPCREC_JUMP_CONDITION_E ) x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); @@ -1922,19 +1917,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); return true; } @@ -1943,19 +1938,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -1964,19 +1959,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -1985,7 +1980,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec } x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); cemu_assert_debug(imlSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); if( imlInstruction->op_conditionalJump.bitMustBeSet ) { x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -2009,7 +2004,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction // BT x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken()); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); return true; } @@ -2063,28 +2058,28 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { sint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, memOffset); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, memOffset); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); } else assert_dbg(); @@ -2095,28 +2090,28 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.registerIndex); } else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) { uint32 sprIndex = (name - PPCREC_NAME_SPR0); if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.registerIndex); else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.registerIndex); else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.registerIndex); else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) { sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, imlInstruction->op_r_name.registerIndex); } else assert_dbg(); } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); } else assert_dbg(); @@ -2338,70 +2333,44 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // fix relocs for(auto& relocIt : x64GenContext.relocateOffsetTable2) { - if(relocIt.type == X64_RELOC_LINK_TO_PPC || relocIt.type == X64_RELOC_LINK_TO_SEGMENT) + // search for segment that starts with this offset + uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo; + uint32 x64Offset = 0xFFFFFFFF; + + IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; + x64Offset = destSegment->x64Offset; + + uint32 relocBase = relocIt.offset; + uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; + if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) { - // if link to PPC, search for segment that starts with this offset - uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo; - uint32 x64Offset = 0xFFFFFFFF; - if (relocIt.type == X64_RELOC_LINK_TO_PPC) + // Jcc relativeImm32 + sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2)); + if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled { - cemu_assert_suspicious(); - //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - //{ - // if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) - // { - // x64Offset = segIt->x64Offset; - // break; - // } - //} - //if (x64Offset == 0xFFFFFFFF) - //{ - // debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); - // // todo: Cleanup - // return false; - //} + // convert to near Jcc + *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70); + // patch offset + *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump; + // replace unused 4 bytes with NOP instruction + relocInstruction[2] = 0x0F; + relocInstruction[3] = 0x1F; + relocInstruction[4] = 0x40; + relocInstruction[5] = 0x00; } else { - IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; - x64Offset = destSegment->x64Offset; + // patch offset + *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6)); } - uint32 relocBase = relocIt.offset; - uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; - if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) - { - // Jcc relativeImm32 - sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2)); - if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled - { - // convert to near Jcc - *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70); - // patch offset - *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump; - // replace unused 4 bytes with NOP instruction - relocInstruction[2] = 0x0F; - relocInstruction[3] = 0x1F; - relocInstruction[4] = 0x40; - relocInstruction[5] = 0x00; - } - else - { - // patch offset - *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6)); - } - } - else if( relocInstruction[0] == 0xE9 ) - { - // JMP relativeImm32 - *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5)); - } - else - assert_dbg(); + } + else if( relocInstruction[0] == 0xE9 ) + { + // JMP relativeImm32 + *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5)); } else - { assert_dbg(); - } } // copy code to executable memory diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 5a2b7500..347f2ea1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -3,10 +3,9 @@ struct x64RelocEntry_t { - x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {}; + x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {}; uint32 offset; - uint8 type; void* extraInfo; }; @@ -97,10 +96,6 @@ struct x64GenContext_t #define REG_RESV_FPR_TEMP (15) -extern sint32 x64Gen_registerMap[12]; - -#define tempToRealRegister(__x) (x64Gen_registerMap[__x]) -#define tempToRealFPRRegister(__x) (__x) #define reg32ToReg16(__x) (__x) enum @@ -128,9 +123,6 @@ enum #define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) #define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) -#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset -#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment - #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index b70a9a31..5bb2505d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -10,11 +10,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -27,11 +27,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct uint32 name = imlInstruction->op_r_name.name; if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); } else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); + x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); } else { @@ -268,11 +268,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; uint8 mode = imlInstruction->op_storeLoad.mode; if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) @@ -384,7 +384,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 || mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) { - PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); + PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); } else { @@ -579,11 +579,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; + sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2; uint8 mode = imlInstruction->op_storeLoad.mode; if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 ) { @@ -670,7 +670,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 || mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) { - PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); + PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 72a2d3f5..3dcd50b6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -24,9 +24,6 @@ void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcIml void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext); -// register allocator -void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext); - // debug void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index a09d4bab..ae3c6c79 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -6,104 +6,6 @@ #include "../PPCRecompilerIml.h" #include "../BackendX64/BackendX64.h" -struct replacedRegisterTracker_t -{ - struct - { - sint32 instructionIndex; - sint32 registerPreviousName; - sint32 registerNewName; - sint32 index; // new index - sint32 previousIndex; // previous index (always out of range) - bool nameMustBeMaintained; // must be stored before replacement and loaded after replacement ends - }replacedRegisterEntry[PPC_X64_GPR_USABLE_REGISTERS]; - sint32 count; -}; - -bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) -{ - IMLUsedRegisters registersUsed; - imlSegment->imlList[imlIndexStart].CheckRegisterUsage(®istersUsed); - // mask all registers used by this instruction - uint32 instructionReservedRegisterMask = 0; - if( registersUsed.readNamedReg1 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg1)); - if( registersUsed.readNamedReg2 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg2)); - if( registersUsed.readNamedReg3 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg3)); - if( registersUsed.writtenNamedReg1 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.writtenNamedReg1)); - // mask all registers that are reserved for other replacements - uint32 replacementReservedRegisterMask = 0; - for(sint32 i=0; icount; i++) - { - replacementReservedRegisterMask |= (1<replacedRegisterEntry[i].index); - } - - // potential improvement: Scan ahead a few instructions and look for registers that are the least used (or ideally never used) - - // pick available register - const uint32 allRegisterMask = (1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; // mask with set bit for every register - uint32 reservedRegisterMask = instructionReservedRegisterMask | replacementReservedRegisterMask; - cemu_assert(instructionReservedRegisterMask != allRegisterMask); // no usable register! (Need to store a register from the replacedRegisterTracker) - sint32 usedRegisterIndex = -1; - for(sint32 i=0; imappedRegister[i] != -1 ) - { - // register is reserved by segment -> In use - *isUsed = true; - *registerName = ppcImlGenContext->mappedRegister[i]; - } - else - { - *isUsed = false; - *registerName = -1; - } - *registerIndex = i; - return true; - } - } - return false; - -} - -void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex) -{ - // store register - sint32 imlIndexEdit = *imlIndex; - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - // name_unusedRegister = unusedRegister - IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit + 0]; - memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr.type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName; - imlIndexEdit++; - // load new register if required - if( replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].nameMustBeMaintained ) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit]; - memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction)); - imlInstructionItr.type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index]; - imlIndexEdit += 1; - } - // move last entry to current one - memcpy(replacedRegisterTracker->replacedRegisterEntry+registerTrackerIndex, replacedRegisterTracker->replacedRegisterEntry+replacedRegisterTracker->count-1, sizeof(replacedRegisterTracker->replacedRegisterEntry[0])); - replacedRegisterTracker->count--; - *imlIndex = imlIndexEdit; -} - bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) { // only xmm0 to xmm14 may be used, xmm15 is reserved diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 1dd1f7ba..1b720d26 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -2,10 +2,16 @@ #include "../PPCRecompiler.h" #include "../PPCRecompilerIml.h" +#include "IMLRegisterAllocator.h" #include "IMLRegisterAllocatorRanges.h" #include "../BackendX64/BackendX64.h" +struct IMLRegisterAllocatorContext +{ + IMLRegisterAllocatorParameters* raParam; +}; + uint32 recRACurrentIterationIndex = 0; uint32 PPCRecRA_getNextIterationIndex() @@ -212,10 +218,10 @@ typedef struct sint32 liveRangesCount; }raLiveRangeInfo_t; -// return a bitmask that contains only registers that are not used by any colliding range -uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) +// mark occupied registers by any overlapping range as unavailable in physRegSet +void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) { - uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; + //uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; for (auto& subrange : range->list_subranges) { IMLSegment* imlSegment = subrange->imlSegment; @@ -233,14 +239,13 @@ uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) { - if(subrangeItr->range->physicalRegister >= 0) - physRegisterMask &= ~(1<<(subrangeItr->range->physicalRegister)); + if (subrangeItr->range->physicalRegister >= 0) + physRegSet.SetReserved(subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } } - return physRegisterMask; } bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } @@ -326,7 +331,7 @@ void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegm // todo } -bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { // sort subranges ascending by start index _sortSegmentAllSubrangesLinkedList(imlSegment); @@ -380,24 +385,22 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe continue; } // find free register for this segment - uint32 physRegisterMask = (1<physicalRegisterPool; + for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) { raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->range->physicalRegister < 0) - assert_dbg(); - physRegisterMask &= ~(1<range->physicalRegister); + cemu_assert_debug(liverange->range->physicalRegister >= 0); + physRegSet.SetReserved(liverange->range->physicalRegister); } // check intersections with other ranges and determine allowed registers - uint32 allowedPhysRegisterMask = 0; - uint32 unusedRegisterMask = physRegisterMask; // mask of registers that are currently not used (does not include range checks) - if (physRegisterMask != 0) + IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) + if(physRegSet.HasAnyAvailable()) { - // check globally - allowedPhysRegisterMask = PPCRecRA_getAllowedRegisterMaskForFullRange(subrangeItr->range); - physRegisterMask &= allowedPhysRegisterMask; + // check globally in all segments + PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr->range, physRegSet); } - if (physRegisterMask == 0) + if (!physRegSet.HasAnyAvailable()) { struct { @@ -480,14 +483,16 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe spillStrategies.availableRegisterHole.physRegister = -1; if (currentIndex >= 0) { - if (unusedRegisterMask != 0) + if (localAvailableRegsMask.HasAnyAvailable()) { - for (sint32 t = 0; t < PPC_X64_GPR_USABLE_REGISTERS; t++) + sint32 physRegItr = -1; + while (true) { - if ((unusedRegisterMask&(1 << t)) == 0) - continue; + physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1); + if (physRegItr < 0) + break; // get size of potential hole for this register - sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, t); + sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, physRegItr); if (distance < 2) continue; // not worth consideration // calculate additional cost due to split @@ -500,7 +505,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe { spillStrategies.availableRegisterHole.cost = cost; spillStrategies.availableRegisterHole.distance = distance; - spillStrategies.availableRegisterHole.physRegister = t; + spillStrategies.availableRegisterHole.physRegister = physRegItr; } } } @@ -611,16 +616,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe return false; } // assign register to range - sint32 registerIndex = -1; - for (sint32 f = 0; f < PPC_X64_GPR_USABLE_REGISTERS; f++) - { - if ((physRegisterMask&(1 << f)) != 0) - { - registerIndex = f; - break; - } - } - subrangeItr->range->physicalRegister = registerIndex; + subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); // add to live ranges liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; @@ -630,7 +626,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe return true; } -void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; @@ -647,7 +643,7 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) { if (segIt->loopDepth != d) continue; - done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, segIt); + done = PPCRecRA_assignSegmentRegisters(ctx, ppcImlGenContext, segIt); if (done == false) break; } @@ -997,8 +993,11 @@ void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGen } } -void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) { + IMLRegisterAllocatorContext ctx; + ctx.raParam = &raParam; + PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); @@ -1006,7 +1005,7 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_assignRegisters(ppcImlGenContext); + PPCRecRA_assignRegisters(ctx, ppcImlGenContext); PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); PPCRecRA_generateMoveInstructions(ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index e69de29b..87e36b00 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -0,0 +1,94 @@ + +// container for storing a set of register indices +// specifically optimized towards storing physical register indices (expected to be below 64) +class IMLPhysRegisterSet +{ +public: + void SetAvailable(uint32 index) + { + cemu_assert_debug(index < 64); + m_regBitmask |= ((uint64)1 << index); + } + + void SetReserved(uint32 index) + { + cemu_assert_debug(index < 64); + m_regBitmask &= ~((uint64)1 << index); + } + + bool IsAvailable(uint32 index) const + { + return (m_regBitmask & (1 << index)) != 0; + } + + IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other) + { + this->m_regBitmask &= other.m_regBitmask; + return *this; + } + + IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other) + { + this->m_regBitmask = other.m_regBitmask; + return *this; + } + + bool HasAnyAvailable() const + { + return m_regBitmask != 0; + } + + // returns index of first available register. Do not call when HasAnyAvailable() == false + uint32 GetFirstAvailableReg() + { + cemu_assert_debug(m_regBitmask != 0); + uint32 regIndex = 0; + auto tmp = m_regBitmask; + while ((tmp & 0xFF) == 0) + { + regIndex += 8; + tmp >>= 8; + } + while ((tmp & 0x1) == 0) + { + regIndex++; + tmp >>= 1; + } + return regIndex; + } + + // returns index of next available register (search includes any register index >= startIndex) + // returns -1 if there is no more register + sint32 GetNextAvailableReg(sint32 startIndex) + { + if (startIndex >= 64) + return -1; + uint32 regIndex = startIndex; + auto tmp = m_regBitmask; + tmp >>= regIndex; + if (!tmp) + return -1; + while ((tmp & 0xFF) == 0) + { + regIndex += 8; + tmp >>= 8; + } + while ((tmp & 0x1) == 0) + { + regIndex++; + tmp >>= 1; + } + return regIndex; + } + +private: + uint64 m_regBitmask{ 0 }; +}; + + +struct IMLRegisterAllocatorParameters +{ + IMLPhysRegisterSet physicalRegisterPool; +}; + +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 2565e3ee..838b61f5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -14,6 +14,7 @@ #include "util/MemMapper/MemMapper.h" #include "IML/IML.h" +#include "IML/IMLRegisterAllocator.h" #include "BackendX64/BackendX64.h" struct PPCInvalidationRange @@ -272,7 +273,21 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) } } - IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext); + IMLRegisterAllocatorParameters raParam; + raParam.physicalRegisterPool.SetAvailable(REG_RAX); + raParam.physicalRegisterPool.SetAvailable(REG_RDX); + raParam.physicalRegisterPool.SetAvailable(REG_RBX); + raParam.physicalRegisterPool.SetAvailable(REG_RBP); + raParam.physicalRegisterPool.SetAvailable(REG_RSI); + raParam.physicalRegisterPool.SetAvailable(REG_RDI); + raParam.physicalRegisterPool.SetAvailable(REG_R8); + raParam.physicalRegisterPool.SetAvailable(REG_R9); + raParam.physicalRegisterPool.SetAvailable(REG_R10); + raParam.physicalRegisterPool.SetAvailable(REG_R11); + raParam.physicalRegisterPool.SetAvailable(REG_R12); + raParam.physicalRegisterPool.SetAvailable(REG_RCX); + + IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); // remove redundant name load and store instructions PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);