PPCRec: Make register pool for RA configurable

This commit is contained in:
Exzap 2022-12-13 19:19:29 +01:00
parent c4fb7b74f8
commit d420622da7
8 changed files with 330 additions and 362 deletions

View file

@ -8,18 +8,13 @@
#include "util/MemMapper/MemMapper.h" #include "util/MemMapper/MemMapper.h"
#include "Common/cpu_features.h" #include "Common/cpu_features.h"
sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping
{
REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX
};
/* /*
* Remember current instruction output offset for reloc * Remember current instruction output offset for reloc
* The instruction generated after this method has been called will be adjusted * The instruction generated after this method has been called will be adjusted
*/ */
void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr)
{ {
x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo); x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, extraInfo);
} }
/* /*
@ -121,7 +116,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
{ {
uint32 branchDstReg = tempToRealRegister(imlInstruction->op_macro.param); uint32 branchDstReg = imlInstruction->op_macro.param;
if(REG_RDX != branchDstReg) if(REG_RDX != branchDstReg)
x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, branchDstReg); x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, branchDstReg);
// potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX
@ -323,11 +318,11 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
*/ */
bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{ {
sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); sint32 realRegisterData = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed ) if( indexed )
realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
if( indexed && realRegisterMem == realRegisterMem2 ) if( indexed && realRegisterMem == realRegisterMem2 )
{ {
return false; return false;
@ -449,11 +444,11 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
*/ */
bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{ {
sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); sint32 realRegisterData = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if (indexed) if (indexed)
realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
if (indexed && realRegisterMem == realRegisterMem2) if (indexed && realRegisterMem == realRegisterMem2)
{ {
@ -588,11 +583,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// registerResult = registerA // registerResult = registerA
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{ {
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL)
{ {
// since MOV doesn't set eflags we need another test instruction // since MOV doesn't set eflags we need another test instruction
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult);
// set cr bits // set cr bits
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
} }
@ -603,7 +598,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
} }
else else
{ {
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
} }
else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
@ -611,24 +606,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// registerResult = endianSwap32(registerA) // registerResult = endianSwap32(registerA)
if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult)
assert_dbg(); assert_dbg();
x64Gen_bswap_reg64Lower32bit(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) else if( imlInstruction->operation == PPCREC_IML_OP_ADD )
{ {
// registerResult += registerA // registerResult += registerA
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC )
{ {
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult);
// set cr bits // set cr bits
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
} }
@ -645,17 +640,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
if( imlInstruction->operation == PPCREC_IML_OP_OR ) if( imlInstruction->operation == PPCREC_IML_OP_OR )
{ {
// registerResult |= registerA // registerResult |= registerA
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_AND ) else if( imlInstruction->operation == PPCREC_IML_OP_AND )
{ {
// registerResult &= registerA // registerResult &= registerA
x64Gen_and_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_and_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
else else
{ {
// registerResult ^= registerA // registerResult ^= registerA
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
@ -669,15 +664,15 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{ {
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
// NOT destination register // NOT destination register
x64Gen_not_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult);
// update cr bits // update cr bits
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
// NOT instruction does not update flags, so we have to generate an additional TEST instruction // NOT instruction does not update flags, so we have to generate an additional TEST instruction
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult);
// set cr bits // set cr bits
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
} }
@ -690,20 +685,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
if(g_CPUFeatures.x86.lzcnt) if(g_CPUFeatures.x86.lzcnt)
{ {
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
else else
{ {
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerA), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA);
sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex;
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult);
x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32-1); x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1);
sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex;
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex);
x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex);
} }
} }
@ -725,7 +720,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
else else
PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC);
// create compare instruction // create compare instruction
x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
// set cr bits // set cr bits
sint32 crRegister = imlInstruction->crRegister; sint32 crRegister = imlInstruction->crRegister;
if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED )
@ -757,10 +752,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{ {
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
// NEG destination register // NEG destination register
x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult);
// update cr bits // update cr bits
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
@ -774,12 +769,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// copy operand to result if different registers // copy operand to result if different registers
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{ {
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
// copy xer_ca to eflags carry // copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// add carry bit // add carry bit
x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 0); x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0);
// update xer carry // update xer carry
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
@ -799,19 +794,19 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// copy operand to result if different registers // copy operand to result if different registers
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{ {
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA);
} }
// copy xer_ca to eflags carry // copy xer_ca to eflags carry
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0);
// add carry bit // add carry bit
x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), (uint32)-1); x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1);
// update xer carry // update xer carry
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
// set cr bits // set cr bits
sint32 crRegister = imlInstruction->crRegister; sint32 crRegister = imlInstruction->crRegister;
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult);
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
} }
} }
@ -819,8 +814,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
{ {
// registerResult = ~registerOperand1 + carry // registerResult = ~registerOperand1 + carry
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r.registerA;
// copy operand to result register // copy operand to result register
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1);
// execute NOT on result // execute NOT on result
@ -850,12 +845,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
{ {
// registerResult = (uint32)(sint32)(sint16)registerA // registerResult = (uint32)(sint32)(sint16)registerA
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), reg32ToReg16(tempToRealRegister(imlInstruction->op_r_r.registerA))); x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA));
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC )
{ {
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult);
// set cr bits // set cr bits
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
} }
@ -871,8 +866,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA )
{ {
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerResult)); x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerResult);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8) for(sint32 f=0; f<0x20; f+=8)
@ -881,7 +876,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
else else
{ {
// calculate effective address // calculate effective address
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8) for(sint32 f=0; f<0x20; f+=8)
@ -902,7 +897,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = immS32 // registerResult = immS32
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) else if( imlInstruction->operation == PPCREC_IML_OP_ADD )
{ {
@ -912,7 +907,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
{ {
assert_dbg(); assert_dbg();
} }
x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
{ {
@ -927,13 +922,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
// update cr register // update cr register
assert_dbg(); assert_dbg();
} }
x64Gen_sub_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_AND ) else if( imlInstruction->operation == PPCREC_IML_OP_AND )
{ {
// registerResult &= immS32 // registerResult &= immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL )
@ -951,14 +946,14 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
// registerResult |= immS32 // registerResult |= immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_or_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) else if( imlInstruction->operation == PPCREC_IML_OP_XOR )
{ {
// registerResult ^= immS32 // registerResult ^= immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
x64Gen_xor_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{ {
@ -967,7 +962,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
if( (imlInstruction->op_r_immS32.immS32&0x80) ) if( (imlInstruction->op_r_immS32.immS32&0x80) )
assert_dbg(); // should not happen assert_dbg(); // should not happen
x64Gen_rol_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint8)imlInstruction->op_r_immS32.immS32); x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED )
{ {
@ -989,7 +984,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
else else
PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC);
// create compare instruction // create compare instruction
x64Gen_cmp_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), imlInstruction->op_r_immS32.immS32); x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32);
// set cr bits // set cr bits
uint32 crRegister = imlInstruction->crRegister; uint32 crRegister = imlInstruction->crRegister;
if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED )
@ -1017,7 +1012,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) else if( imlInstruction->operation == PPCREC_IML_OP_MFCR )
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
uint32 destRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); uint32 destRegister = imlInstruction->op_r_immS32.registerIndex;
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister);
for(sint32 f=0; f<32; f++) for(sint32 f=0; f<32; f++)
{ {
@ -1028,7 +1023,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF)
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
uint32 srcRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex;
uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
for (sint32 f = 0; f < 32; f++) for (sint32 f = 0; f < 32; f++)
{ {
@ -1066,17 +1061,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR
{ {
if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
} }
@ -1084,17 +1079,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR
{ {
if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
} }
@ -1102,17 +1097,17 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR
{ {
if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT)
{ {
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
} }
@ -1120,9 +1115,9 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
if (imlInstruction->op_conditional_r_s32.bitMustBeSet) if (imlInstruction->op_conditional_r_s32.bitMustBeSet)
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
else else
x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
return true; return true;
} }
return false; return false;
@ -1134,9 +1129,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = registerOperand1 + registerOperand2 // registerResult = registerOperand1 + registerOperand2
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY; bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
@ -1197,9 +1192,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = registerOperand1 - registerOperand2 // registerResult = registerOperand1 - registerOperand2
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
if( rRegOperand1 == rRegOperand2 ) if( rRegOperand1 == rRegOperand2 )
{ {
// result = operand1 - operand1 -> 0 // result = operand1 - operand1 -> 0
@ -1241,9 +1236,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = registerOperand1 - registerOperand2 + carry // registerResult = registerOperand1 - registerOperand2 + carry
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
if( rRegOperand1 == rRegOperand2 ) if( rRegOperand1 == rRegOperand2 )
{ {
// copy xer_ca to eflags carry // copy xer_ca to eflags carry
@ -1295,9 +1290,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = registerOperand1 * registerOperand2 // registerResult = registerOperand1 * registerOperand2
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
{ {
// be careful not to overwrite the operand before we use it // be careful not to overwrite the operand before we use it
@ -1334,9 +1329,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
return false; return false;
} }
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperandA = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperandA = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperandB = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperandB = imlInstruction->op_r_r_r.registerB;
// update carry flag // update carry flag
// carry flag is detected this way: // carry flag is detected this way:
//if ((~a+b) < a) { //if ((~a+b) < a) {
@ -1402,9 +1397,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
{ {
@ -1455,9 +1450,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
// todo: Use BMI2 rotate if available // todo: Use BMI2 rotate if available
// check if CL/ECX/RCX is available // check if CL/ECX/RCX is available
if( rRegResult != REG_RCX && rRegOperand1 != REG_RCX && rRegOperand2 != REG_RCX ) if( rRegResult != REG_RCX && rRegOperand1 != REG_RCX && rRegOperand2 != REG_RCX )
@ -1502,9 +1497,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits) // registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits)
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
// save cr // save cr
if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER )
{ {
@ -1576,9 +1571,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX);
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX);
@ -1621,9 +1616,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX);
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX);
@ -1669,9 +1664,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
{ {
// registerResult = registerOperand1 | ~registerOperand2 // registerResult = registerOperand1 | ~registerOperand2
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult;
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA;
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB;
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2);
x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP);
@ -1705,8 +1700,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
// registerResult = registerOperand + immS32 // registerResult = registerOperand + immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
if( rRegResult != rRegOperand ) if( rRegResult != rRegOperand )
{ {
@ -1719,8 +1714,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
{ {
// registerResult = registerOperand + immS32 // registerResult = registerOperand + immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
if( rRegResult != rRegOperand ) if( rRegResult != rRegOperand )
{ {
@ -1749,8 +1744,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
// registerResult = immS32 - registerOperand // registerResult = immS32 - registerOperand
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32; sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32;
if( rRegResult != rRegOperand ) if( rRegResult != rRegOperand )
{ {
@ -1799,23 +1794,23 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
// save cr // save cr
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
// copy rS to temporary register // copy rS to temporary register
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA);
// rotate destination register // rotate destination register
if( sh ) if( sh )
x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F);
// AND destination register with inverted mask // AND destination register with inverted mask
x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), ~mask); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, ~mask);
// AND temporary rS register with mask // AND temporary rS register with mask
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask);
// OR result with temporary // OR result with temporary
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), REG_RESV_TEMP); x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, REG_RESV_TEMP);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{ {
// registerResult = registerOperand * immS32 // registerResult = registerOperand * immS32
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult;
sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA;
sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32;
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize
if( rRegResult != rRegOperand ) if( rRegResult != rRegOperand )
@ -1829,7 +1824,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32; uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32;
// MOV registerResult, registerOperand (if different) // MOV registerResult, registerOperand (if different)
if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult )
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA);
// todo: Detect if we don't need to update carry // todo: Detect if we don't need to update carry
// generic case // generic case
// TEST registerResult, (1<<(SH+1))-1 // TEST registerResult, (1<<(SH+1))-1
@ -1838,11 +1833,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
caTestMask = 0x7FFFFFFF; caTestMask = 0x7FFFFFFF;
else else
caTestMask = (1 << (sh)) - 1; caTestMask = (1 << (sh)) - 1;
x64Gen_test_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), caTestMask); x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask);
// SETNE/NZ [ESP+XER_CA] // SETNE/NZ [ESP+XER_CA]
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
// SAR registerResult, SH // SAR registerResult, SH
x64Gen_sar_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), sh); x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh);
// JNS <skipInstruction> (if sign not set) // JNS <skipInstruction> (if sign not set)
sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex;
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here
@ -1854,7 +1849,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{ {
sint32 crRegister = imlInstruction->crRegister; sint32 crRegister = imlInstruction->crRegister;
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult);
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT));
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT));
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ));
@ -1866,17 +1861,17 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
// MOV registerResult, registerOperand (if different) // MOV registerResult, registerOperand (if different)
if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult )
x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA);
// Shift // Shift
if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ) if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT )
x64Gen_shl_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32);
else else
x64Gen_shr_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32);
// CR update // CR update
if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
{ {
// since SHL/SHR only modifies the OF flag we need another TEST reg,reg here // since SHL/SHR only modifies the OF flag we need another TEST reg,reg here
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerResult);
PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction);
} }
} }
@ -1894,7 +1889,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{ {
// jump always // jump always
cemu_assert_debug(imlSegment->nextSegmentBranchTaken); cemu_assert_debug(imlSegment->nextSegmentBranchTaken);
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmp_imm32(x64GenContext, 0); x64Gen_jmp_imm32(x64GenContext, 0);
} }
else else
@ -1904,7 +1899,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 )
{ {
// temporary cr is used, which means we use the currently active eflags // temporary cr is used, which means we use the currently active eflags
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
sint32 condition = imlInstruction->op_conditionalJump.condition; sint32 condition = imlInstruction->op_conditionalJump.condition;
if( condition == PPCREC_JUMP_CONDITION_E ) if( condition == PPCREC_JUMP_CONDITION_E )
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
@ -1922,19 +1917,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{ {
if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0);
return true; return true;
} }
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0);
return true; return true;
} }
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0);
return true; return true;
} }
@ -1943,19 +1938,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{ {
if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0);
return true; return true;
} }
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0);
return true; return true;
} }
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0);
return true; return true;
} }
@ -1964,19 +1959,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
{ {
if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0);
return true; return true;
} }
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0);
return true; return true;
} }
else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT)
{ {
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken);
x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0);
return true; return true;
} }
@ -1985,7 +1980,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec
} }
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
cemu_assert_debug(imlSegment->GetBranchTaken()); cemu_assert_debug(imlSegment->GetBranchTaken());
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken());
if( imlInstruction->op_conditionalJump.bitMustBeSet ) if( imlInstruction->op_conditionalJump.bitMustBeSet )
{ {
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
@ -2009,7 +2004,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction
// BT // BT
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative
cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken());
PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken());
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0);
return true; return true;
} }
@ -2063,28 +2058,28 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
uint32 name = imlInstruction->op_r_name.name; uint32 name = imlInstruction->op_r_name.name;
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
{ {
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0));
} }
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{ {
sint32 sprIndex = (name - PPCREC_NAME_SPR0); sint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR) if (sprIndex == SPR_LR)
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
else if (sprIndex == SPR_CTR) else if (sprIndex == SPR_CTR)
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
else if (sprIndex == SPR_XER) else if (sprIndex == SPR_XER)
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{ {
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, memOffset); x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, memOffset);
} }
else else
assert_dbg(); assert_dbg();
} }
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{ {
x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
} }
else else
assert_dbg(); assert_dbg();
@ -2095,28 +2090,28 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
uint32 name = imlInstruction->op_r_name.name; uint32 name = imlInstruction->op_r_name.name;
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
{ {
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.registerIndex);
} }
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{ {
uint32 sprIndex = (name - PPCREC_NAME_SPR0); uint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR) if (sprIndex == SPR_LR)
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.registerIndex);
else if (sprIndex == SPR_CTR) else if (sprIndex == SPR_CTR)
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.registerIndex);
else if (sprIndex == SPR_XER) else if (sprIndex == SPR_XER)
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.registerIndex);
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{ {
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, tempToRealRegister(imlInstruction->op_r_name.registerIndex)); x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, imlInstruction->op_r_name.registerIndex);
} }
else else
assert_dbg(); assert_dbg();
} }
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{ {
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex);
} }
else else
assert_dbg(); assert_dbg();
@ -2338,70 +2333,44 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
// fix relocs // fix relocs
for(auto& relocIt : x64GenContext.relocateOffsetTable2) for(auto& relocIt : x64GenContext.relocateOffsetTable2)
{ {
if(relocIt.type == X64_RELOC_LINK_TO_PPC || relocIt.type == X64_RELOC_LINK_TO_SEGMENT) // search for segment that starts with this offset
uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo;
uint32 x64Offset = 0xFFFFFFFF;
IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo;
x64Offset = destSegment->x64Offset;
uint32 relocBase = relocIt.offset;
uint8* relocInstruction = x64GenContext.codeBuffer+relocBase;
if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) )
{ {
// if link to PPC, search for segment that starts with this offset // Jcc relativeImm32
uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo; sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2));
uint32 x64Offset = 0xFFFFFFFF; if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled
if (relocIt.type == X64_RELOC_LINK_TO_PPC)
{ {
cemu_assert_suspicious(); // convert to near Jcc
//for (IMLSegment* segIt : ppcImlGenContext->segmentList2) *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70);
//{ // patch offset
// if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump;
// { // replace unused 4 bytes with NOP instruction
// x64Offset = segIt->x64Offset; relocInstruction[2] = 0x0F;
// break; relocInstruction[3] = 0x1F;
// } relocInstruction[4] = 0x40;
//} relocInstruction[5] = 0x00;
//if (x64Offset == 0xFFFFFFFF)
//{
// debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress);
// // todo: Cleanup
// return false;
//}
} }
else else
{ {
IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; // patch offset
x64Offset = destSegment->x64Offset; *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6));
} }
uint32 relocBase = relocIt.offset; }
uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; else if( relocInstruction[0] == 0xE9 )
if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) {
{ // JMP relativeImm32
// Jcc relativeImm32 *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5));
sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2));
if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled
{
// convert to near Jcc
*(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70);
// patch offset
*(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump;
// replace unused 4 bytes with NOP instruction
relocInstruction[2] = 0x0F;
relocInstruction[3] = 0x1F;
relocInstruction[4] = 0x40;
relocInstruction[5] = 0x00;
}
else
{
// patch offset
*(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6));
}
}
else if( relocInstruction[0] == 0xE9 )
{
// JMP relativeImm32
*(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5));
}
else
assert_dbg();
} }
else else
{
assert_dbg(); assert_dbg();
}
} }
// copy code to executable memory // copy code to executable memory

View file

@ -3,10 +3,9 @@
struct x64RelocEntry_t struct x64RelocEntry_t
{ {
x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {}; x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {};
uint32 offset; uint32 offset;
uint8 type;
void* extraInfo; void* extraInfo;
}; };
@ -97,10 +96,6 @@ struct x64GenContext_t
#define REG_RESV_FPR_TEMP (15) #define REG_RESV_FPR_TEMP (15)
extern sint32 x64Gen_registerMap[12];
#define tempToRealRegister(__x) (x64Gen_registerMap[__x])
#define tempToRealFPRRegister(__x) (__x)
#define reg32ToReg16(__x) (__x) #define reg32ToReg16(__x) (__x)
enum enum
@ -128,9 +123,6 @@ enum
#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) #define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI)
#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) #define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI)
#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset
#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment
#define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_GPR_USABLE_REGISTERS (16-4)
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register

View file

@ -10,11 +10,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunct
uint32 name = imlInstruction->op_r_name.name; uint32 name = imlInstruction->op_r_name.name;
if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) )
{ {
x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0));
} }
else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) )
{ {
x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); x64Gen_movupd_xmmReg_memReg128(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0));
} }
else else
{ {
@ -27,11 +27,11 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunct
uint32 name = imlInstruction->op_r_name.name; uint32 name = imlInstruction->op_r_name.name;
if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) )
{ {
x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0));
} }
else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) )
{ {
x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); x64Gen_movupd_memReg128_xmmReg(x64GenContext, imlInstruction->op_r_name.registerIndex, REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0));
} }
else else
{ {
@ -268,11 +268,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed ) if( indexed )
realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
uint8 mode = imlInstruction->op_storeLoad.mode; uint8 mode = imlInstruction->op_storeLoad.mode;
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 )
@ -384,7 +384,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 || else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ||
mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
{ {
PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR);
} }
else else
{ {
@ -579,11 +579,11 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{ {
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed ) if( indexed )
realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
uint8 mode = imlInstruction->op_storeLoad.mode; uint8 mode = imlInstruction->op_storeLoad.mode;
if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 ) if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 )
{ {
@ -670,7 +670,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 || else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 ||
mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
{ {
PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR);
} }
else else
{ {

View file

@ -24,9 +24,6 @@ void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcIml
void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);
void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext);
// register allocator
void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
// debug // debug
void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext);

View file

@ -6,104 +6,6 @@
#include "../PPCRecompilerIml.h" #include "../PPCRecompilerIml.h"
#include "../BackendX64/BackendX64.h" #include "../BackendX64/BackendX64.h"
struct replacedRegisterTracker_t
{
struct
{
sint32 instructionIndex;
sint32 registerPreviousName;
sint32 registerNewName;
sint32 index; // new index
sint32 previousIndex; // previous index (always out of range)
bool nameMustBeMaintained; // must be stored before replacement and loaded after replacement ends
}replacedRegisterEntry[PPC_X64_GPR_USABLE_REGISTERS];
sint32 count;
};
bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed)
{
IMLUsedRegisters registersUsed;
imlSegment->imlList[imlIndexStart].CheckRegisterUsage(&registersUsed);
// mask all registers used by this instruction
uint32 instructionReservedRegisterMask = 0;
if( registersUsed.readNamedReg1 != -1 )
instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg1));
if( registersUsed.readNamedReg2 != -1 )
instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg2));
if( registersUsed.readNamedReg3 != -1 )
instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg3));
if( registersUsed.writtenNamedReg1 != -1 )
instructionReservedRegisterMask |= (1<<(registersUsed.writtenNamedReg1));
// mask all registers that are reserved for other replacements
uint32 replacementReservedRegisterMask = 0;
for(sint32 i=0; i<replacedRegisterTracker->count; i++)
{
replacementReservedRegisterMask |= (1<<replacedRegisterTracker->replacedRegisterEntry[i].index);
}
// potential improvement: Scan ahead a few instructions and look for registers that are the least used (or ideally never used)
// pick available register
const uint32 allRegisterMask = (1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; // mask with set bit for every register
uint32 reservedRegisterMask = instructionReservedRegisterMask | replacementReservedRegisterMask;
cemu_assert(instructionReservedRegisterMask != allRegisterMask); // no usable register! (Need to store a register from the replacedRegisterTracker)
sint32 usedRegisterIndex = -1;
for(sint32 i=0; i<PPC_X64_GPR_USABLE_REGISTERS; i++)
{
if( (reservedRegisterMask&(1<<i)) == 0 )
{
if( (instructionReservedRegisterMask&(1<<i)) == 0 && ppcImlGenContext->mappedRegister[i] != -1 )
{
// register is reserved by segment -> In use
*isUsed = true;
*registerName = ppcImlGenContext->mappedRegister[i];
}
else
{
*isUsed = false;
*registerName = -1;
}
*registerIndex = i;
return true;
}
}
return false;
}
void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex)
{
// store register
sint32 imlIndexEdit = *imlIndex;
PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1);
// name_unusedRegister = unusedRegister
IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit + 0];
memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr.type = PPCREC_IML_TYPE_NAME_R;
imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER;
imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index;
imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName;
imlIndexEdit++;
// load new register if required
if( replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].nameMustBeMaintained )
{
PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1);
IMLInstruction& imlInstructionItr = imlSegment->imlList[imlIndexEdit];
memset(&imlInstructionItr, 0x00, sizeof(IMLInstruction));
imlInstructionItr.type = PPCREC_IML_TYPE_R_NAME;
imlInstructionItr.crRegister = PPC_REC_INVALID_REGISTER;
imlInstructionItr.operation = PPCREC_IML_OP_ASSIGN;
imlInstructionItr.op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index;
imlInstructionItr.op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index];
imlIndexEdit += 1;
}
// move last entry to current one
memcpy(replacedRegisterTracker->replacedRegisterEntry+registerTrackerIndex, replacedRegisterTracker->replacedRegisterEntry+replacedRegisterTracker->count-1, sizeof(replacedRegisterTracker->replacedRegisterEntry[0]));
replacedRegisterTracker->count--;
*imlIndex = imlIndexEdit;
}
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
{ {
// only xmm0 to xmm14 may be used, xmm15 is reserved // only xmm0 to xmm14 may be used, xmm15 is reserved

View file

@ -2,10 +2,16 @@
#include "../PPCRecompiler.h" #include "../PPCRecompiler.h"
#include "../PPCRecompilerIml.h" #include "../PPCRecompilerIml.h"
#include "IMLRegisterAllocator.h"
#include "IMLRegisterAllocatorRanges.h" #include "IMLRegisterAllocatorRanges.h"
#include "../BackendX64/BackendX64.h" #include "../BackendX64/BackendX64.h"
struct IMLRegisterAllocatorContext
{
IMLRegisterAllocatorParameters* raParam;
};
uint32 recRACurrentIterationIndex = 0; uint32 recRACurrentIterationIndex = 0;
uint32 PPCRecRA_getNextIterationIndex() uint32 PPCRecRA_getNextIterationIndex()
@ -212,10 +218,10 @@ typedef struct
sint32 liveRangesCount; sint32 liveRangesCount;
}raLiveRangeInfo_t; }raLiveRangeInfo_t;
// return a bitmask that contains only registers that are not used by any colliding range // mark occupied registers by any overlapping range as unavailable in physRegSet
uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet)
{ {
uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; //uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1;
for (auto& subrange : range->list_subranges) for (auto& subrange : range->list_subranges)
{ {
IMLSegment* imlSegment = subrange->imlSegment; IMLSegment* imlSegment = subrange->imlSegment;
@ -233,14 +239,13 @@ uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range)
(subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) ||
(subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) )
{ {
if(subrangeItr->range->physicalRegister >= 0) if (subrangeItr->range->physicalRegister >= 0)
physRegisterMask &= ~(1<<(subrangeItr->range->physicalRegister)); physRegSet.SetReserved(subrangeItr->range->physicalRegister);
} }
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_segmentSubrangesGPR.next;
} }
} }
return physRegisterMask;
} }
bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; }
@ -326,7 +331,7 @@ void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegm
// todo // todo
} }
bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
{ {
// sort subranges ascending by start index // sort subranges ascending by start index
_sortSegmentAllSubrangesLinkedList(imlSegment); _sortSegmentAllSubrangesLinkedList(imlSegment);
@ -380,24 +385,22 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
continue; continue;
} }
// find free register for this segment // find free register for this segment
uint32 physRegisterMask = (1<<PPC_X64_GPR_USABLE_REGISTERS)-1; IMLPhysRegisterSet physRegSet = ctx.raParam->physicalRegisterPool;
for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) for (sint32 f = 0; f < liveInfo.liveRangesCount; f++)
{ {
raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f];
if (liverange->range->physicalRegister < 0) cemu_assert_debug(liverange->range->physicalRegister >= 0);
assert_dbg(); physRegSet.SetReserved(liverange->range->physicalRegister);
physRegisterMask &= ~(1<<liverange->range->physicalRegister);
} }
// check intersections with other ranges and determine allowed registers // check intersections with other ranges and determine allowed registers
uint32 allowedPhysRegisterMask = 0; IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
uint32 unusedRegisterMask = physRegisterMask; // mask of registers that are currently not used (does not include range checks) if(physRegSet.HasAnyAvailable())
if (physRegisterMask != 0)
{ {
// check globally // check globally in all segments
allowedPhysRegisterMask = PPCRecRA_getAllowedRegisterMaskForFullRange(subrangeItr->range); PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr->range, physRegSet);
physRegisterMask &= allowedPhysRegisterMask;
} }
if (physRegisterMask == 0) if (!physRegSet.HasAnyAvailable())
{ {
struct struct
{ {
@ -480,14 +483,16 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
spillStrategies.availableRegisterHole.physRegister = -1; spillStrategies.availableRegisterHole.physRegister = -1;
if (currentIndex >= 0) if (currentIndex >= 0)
{ {
if (unusedRegisterMask != 0) if (localAvailableRegsMask.HasAnyAvailable())
{ {
for (sint32 t = 0; t < PPC_X64_GPR_USABLE_REGISTERS; t++) sint32 physRegItr = -1;
while (true)
{ {
if ((unusedRegisterMask&(1 << t)) == 0) physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1);
continue; if (physRegItr < 0)
break;
// get size of potential hole for this register // get size of potential hole for this register
sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, t); sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, physRegItr);
if (distance < 2) if (distance < 2)
continue; // not worth consideration continue; // not worth consideration
// calculate additional cost due to split // calculate additional cost due to split
@ -500,7 +505,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
{ {
spillStrategies.availableRegisterHole.cost = cost; spillStrategies.availableRegisterHole.cost = cost;
spillStrategies.availableRegisterHole.distance = distance; spillStrategies.availableRegisterHole.distance = distance;
spillStrategies.availableRegisterHole.physRegister = t; spillStrategies.availableRegisterHole.physRegister = physRegItr;
} }
} }
} }
@ -611,16 +616,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
return false; return false;
} }
// assign register to range // assign register to range
sint32 registerIndex = -1; subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg();
for (sint32 f = 0; f < PPC_X64_GPR_USABLE_REGISTERS; f++)
{
if ((physRegisterMask&(1 << f)) != 0)
{
registerIndex = f;
break;
}
}
subrangeItr->range->physicalRegister = registerIndex;
// add to live ranges // add to live ranges
liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr;
liveInfo.liveRangesCount++; liveInfo.liveRangesCount++;
@ -630,7 +626,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe
return true; return true;
} }
void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) void PPCRecRA_assignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext)
{ {
// start with frequently executed segments first // start with frequently executed segments first
sint32 maxLoopDepth = 0; sint32 maxLoopDepth = 0;
@ -647,7 +643,7 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext)
{ {
if (segIt->loopDepth != d) if (segIt->loopDepth != d)
continue; continue;
done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, segIt); done = PPCRecRA_assignSegmentRegisters(ctx, ppcImlGenContext, segIt);
if (done == false) if (done == false)
break; break;
} }
@ -997,8 +993,11 @@ void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGen
} }
} }
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
{ {
IMLRegisterAllocatorContext ctx;
ctx.raParam = &raParam;
PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext);
ppcImlGenContext->raInfo.list_ranges = std::vector<raLivenessRange_t*>(); ppcImlGenContext->raInfo.list_ranges = std::vector<raLivenessRange_t*>();
@ -1006,7 +1005,7 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext
PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext);
PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext);
PPCRecRA_assignRegisters(ppcImlGenContext); PPCRecRA_assignRegisters(ctx, ppcImlGenContext);
PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext);
PPCRecRA_generateMoveInstructions(ppcImlGenContext); PPCRecRA_generateMoveInstructions(ppcImlGenContext);

View file

@ -0,0 +1,94 @@
// container for storing a set of register indices
// specifically optimized towards storing physical register indices (expected to be below 64)
class IMLPhysRegisterSet
{
public:
void SetAvailable(uint32 index)
{
cemu_assert_debug(index < 64);
m_regBitmask |= ((uint64)1 << index);
}
void SetReserved(uint32 index)
{
cemu_assert_debug(index < 64);
m_regBitmask &= ~((uint64)1 << index);
}
bool IsAvailable(uint32 index) const
{
return (m_regBitmask & (1 << index)) != 0;
}
IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
{
this->m_regBitmask &= other.m_regBitmask;
return *this;
}
IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other)
{
this->m_regBitmask = other.m_regBitmask;
return *this;
}
bool HasAnyAvailable() const
{
return m_regBitmask != 0;
}
// returns index of first available register. Do not call when HasAnyAvailable() == false
uint32 GetFirstAvailableReg()
{
cemu_assert_debug(m_regBitmask != 0);
uint32 regIndex = 0;
auto tmp = m_regBitmask;
while ((tmp & 0xFF) == 0)
{
regIndex += 8;
tmp >>= 8;
}
while ((tmp & 0x1) == 0)
{
regIndex++;
tmp >>= 1;
}
return regIndex;
}
// returns index of next available register (search includes any register index >= startIndex)
// returns -1 if there is no more register
sint32 GetNextAvailableReg(sint32 startIndex)
{
if (startIndex >= 64)
return -1;
uint32 regIndex = startIndex;
auto tmp = m_regBitmask;
tmp >>= regIndex;
if (!tmp)
return -1;
while ((tmp & 0xFF) == 0)
{
regIndex += 8;
tmp >>= 8;
}
while ((tmp & 0x1) == 0)
{
regIndex++;
tmp >>= 1;
}
return regIndex;
}
private:
uint64 m_regBitmask{ 0 };
};
struct IMLRegisterAllocatorParameters
{
IMLPhysRegisterSet physicalRegisterPool;
};
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam);

View file

@ -14,6 +14,7 @@
#include "util/MemMapper/MemMapper.h" #include "util/MemMapper/MemMapper.h"
#include "IML/IML.h" #include "IML/IML.h"
#include "IML/IMLRegisterAllocator.h"
#include "BackendX64/BackendX64.h" #include "BackendX64/BackendX64.h"
struct PPCInvalidationRange struct PPCInvalidationRange
@ -272,7 +273,21 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
} }
} }
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext); IMLRegisterAllocatorParameters raParam;
raParam.physicalRegisterPool.SetAvailable(REG_RAX);
raParam.physicalRegisterPool.SetAvailable(REG_RDX);
raParam.physicalRegisterPool.SetAvailable(REG_RBX);
raParam.physicalRegisterPool.SetAvailable(REG_RBP);
raParam.physicalRegisterPool.SetAvailable(REG_RSI);
raParam.physicalRegisterPool.SetAvailable(REG_RDI);
raParam.physicalRegisterPool.SetAvailable(REG_R8);
raParam.physicalRegisterPool.SetAvailable(REG_R9);
raParam.physicalRegisterPool.SetAvailable(REG_R10);
raParam.physicalRegisterPool.SetAvailable(REG_R11);
raParam.physicalRegisterPool.SetAvailable(REG_R12);
raParam.physicalRegisterPool.SetAvailable(REG_RCX);
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam);
// remove redundant name load and store instructions // remove redundant name load and store instructions
PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);