PPCRec: Initial support for typed registers

This commit is contained in:
Exzap 2023-02-02 17:18:36 +01:00
parent 4c948d4a51
commit e9c161c508
12 changed files with 1128 additions and 959 deletions

View file

@ -8,14 +8,22 @@
#include "util/MemMapper/MemMapper.h"
#include "Common/cpu_features.h"
static x86Assembler64::GPR32 _reg32(sint8 physRegId)
static x86Assembler64::GPR32 _reg32(IMLReg physReg)
{
return (x86Assembler64::GPR32)physRegId;
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32);
return (x86Assembler64::GPR32)physReg.GetRegID();
}
static x86Assembler64::GPR8_REX _reg8(sint8 physRegId)
static uint32 _reg64(IMLReg physReg)
{
return (x86Assembler64::GPR8_REX)physRegId;
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64);
return physReg.GetRegID();
}
static x86Assembler64::GPR8_REX _reg8(IMLReg physReg)
{
cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit
return (x86Assembler64::GPR8_REX)physReg.GetRegID();
}
static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId)
@ -28,6 +36,11 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
return (x86Assembler64::GPR8_REX)regId;
}
static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId)
{
return (x86Assembler64::GPR8_REX)regId;
}
static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId)
{
return (x86Assembler64::GPR64)regId;
@ -132,7 +145,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
{
if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG)
{
uint32 branchDstReg = imlInstruction->op_macro.param;
uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg);
if(X86_REG_RDX != branchDstReg)
x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg);
// potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX
@ -334,11 +347,16 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction,
*/
bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{
sint32 realRegisterData = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
if (indexed)
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if( indexed )
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
if( indexed && realRegisterMem == realRegisterMem2 )
{
return false;
@ -439,11 +457,16 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
*/
bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
{
sint32 realRegisterData = imlInstruction->op_storeLoad.registerData;
sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem;
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32);
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32);
if (indexed)
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2;
cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32);
IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID();
IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID();
IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER;
if (indexed)
realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID();
if (indexed && realRegisterMem == realRegisterMem2)
{
@ -542,39 +565,42 @@ bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regR = _reg32(imlInstruction->op_r_r.regR);
auto regA = _reg32(imlInstruction->op_r_r.regA);
if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
{
// registerResult = registerA
if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
if (regR != regA)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA);
}
else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP)
{
if (imlInstruction->op_r_r.regA != imlInstruction->op_r_r.regR)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA); // if movbe is available we can move and swap in a single instruction?
x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.regR);
if (regA != regR)
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction?
x64Gen_bswap_reg64Lower32bit(x64GenContext, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 )
{
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA);
}
else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32)
{
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.regR, reg32ToReg16(imlInstruction->op_r_r.regA));
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA));
}
else if( imlInstruction->operation == PPCREC_IML_OP_NOT )
{
// copy register content if different registers
if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR);
if( regR != regA )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_not_reg64Low32(x64GenContext, regR);
}
else if (imlInstruction->operation == PPCREC_IML_OP_NEG)
{
// copy register content if different registers
if (imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA)
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR);
if (regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_neg_reg64Low32(x64GenContext, regR);
}
else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW )
{
@ -582,29 +608,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
// LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
if(g_CPUFeatures.x86.lzcnt)
{
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA);
}
else
{
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regA, imlInstruction->op_r_r.regA);
x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA);
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0);
x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR, imlInstruction->op_r_r.regA);
x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.regR);
x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32-1);
x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA);
x64Gen_neg_reg64Low32(x64GenContext, regR);
x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1);
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.regR, 32);
x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32);
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
}
}
else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ )
{
if( imlInstruction->op_r_r.regR != imlInstruction->op_r_r.regA )
if( regR != regA )
{
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regR);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regR);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8)
@ -613,7 +639,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
else
{
// calculate effective address
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.regA);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8)
@ -630,15 +656,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regR = _reg32(imlInstruction->op_r_immS32.regR);
if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
{
x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.regR, (uint32)imlInstruction->op_r_immS32.immS32);
x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{
if( (imlInstruction->op_r_immS32.immS32&0x80) )
assert_dbg(); // should not happen
x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.regR, (uint8)imlInstruction->op_r_immS32.immS32);
cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0);
x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MFCR )
{
@ -698,12 +725,13 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR
bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR);
auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA);
auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB);
if (imlInstruction->operation == PPCREC_IML_OP_ADD)
{
// registerResult = registerOperand1 + registerOperand2
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
{
// be careful not to overwrite the operand before we use it
@ -721,9 +749,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_SUB )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if( rRegOperand1 == rRegOperand2 )
{
// result = operand1 - operand1 -> 0
@ -748,28 +773,22 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR)
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegA = imlInstruction->op_r_r_r.regA;
sint32 rRegB = imlInstruction->op_r_r_r.regB;
if (rRegResult == rRegB)
std::swap(rRegA, rRegB);
if (rRegResult == rRegOperand2)
std::swap(rRegOperand1, rRegOperand2);
if (rRegResult != rRegA)
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegA);
if (rRegResult != rRegOperand1)
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
if (imlInstruction->operation == PPCREC_IML_OP_OR)
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
else if (imlInstruction->operation == PPCREC_IML_OP_AND)
x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
else
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB);
x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{
// registerResult = registerOperand1 * registerOperand2
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) )
{
// be careful not to overwrite the operand before we use it
@ -789,9 +808,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW )
{
// registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits)
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
{
@ -831,9 +847,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
// todo: Use BMI2 rotate if available
// check if CL/ECX/RCX is available
if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX )
@ -872,10 +885,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
// since our register allocator doesn't support instruction based fixed phys registers yet
// we'll instead have to temporarily shuffle registers around
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
// we use BMI2's shift instructions until the RA can assign fixed registers
if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
{
@ -947,10 +956,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
// mov operand 2 to temp register
@ -981,10 +986,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED )
{
sint32 rRegResult = imlInstruction->op_r_r_r.regR;
sint32 rRegOperand1 = imlInstruction->op_r_r_r.regA;
sint32 rRegOperand2 = imlInstruction->op_r_r_r.regB;
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX);
x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX);
// mov operand 2 to temp register
@ -1102,37 +1103,35 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction,
bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
sint32 regResult = imlInstruction->op_r_r_s32.regR;
sint32 regOperand = imlInstruction->op_r_r_s32.regA;
auto regR = _reg32(imlInstruction->op_r_r_s32.regR);
auto regA = _reg32(imlInstruction->op_r_r_s32.regA);
uint32 immS32 = imlInstruction->op_r_r_s32.immS32;
if( imlInstruction->operation == PPCREC_IML_OP_ADD )
{
sint32 rRegResult = imlInstruction->op_r_r_s32.regR;
sint32 rRegOperand = imlInstruction->op_r_r_s32.regA;
uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32;
if(regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_add_reg64Low32_imm32(x64GenContext, regResult, (uint32)immU32);
if(regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32);
}
else if (imlInstruction->operation == PPCREC_IML_OP_SUB)
{
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32);
if (regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32);
}
else if (imlInstruction->operation == PPCREC_IML_OP_AND ||
imlInstruction->operation == PPCREC_IML_OP_OR ||
imlInstruction->operation == PPCREC_IML_OP_XOR)
{
if (regResult != regOperand)
x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand);
if (regR != regA)
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
if (imlInstruction->operation == PPCREC_IML_OP_AND)
x64Gen_and_reg64Low32_imm32(x64GenContext, regResult, immS32);
x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32);
else if (imlInstruction->operation == PPCREC_IML_OP_OR)
x64Gen_or_reg64Low32_imm32(x64GenContext, regResult, immS32);
x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32);
else // XOR
x64Gen_xor_reg64Low32_imm32(x64GenContext, regResult, immS32);
x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32);
}
else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI )
{
@ -1143,41 +1142,39 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
uint32 sh = (vImm>>16)&0xFF;
uint32 mask = ppc_mask(mb, me);
// copy rS to temporary register
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.regA);
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
// rotate destination register
if( sh )
x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F);
// AND destination register with inverted mask
x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.regR, ~mask);
x64Gen_and_reg64Low32_imm32(x64GenContext, regR, ~mask);
// AND temporary rS register with mask
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask);
// OR result with temporary
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r_s32.regR, REG_RESV_TEMP);
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{
// registerResult = registerOperand * immS32
sint32 rRegResult = imlInstruction->op_r_r_s32.regR;
sint32 rRegOperand = imlInstruction->op_r_r_s32.regA;
sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32;
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize
if( rRegResult != rRegOperand )
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand);
x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP);
if( regR != regA )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
}
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
{
if( imlInstruction->op_r_r_s32.regA != imlInstruction->op_r_r_s32.regR )
x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.regA);
if( regA != regR )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32);
x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32);
x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
else // RIGHT_SHIFT_S
x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.regR, imlInstruction->op_r_r_s32.immS32);
x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
}
else
{
@ -1236,50 +1233,52 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction
void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
auto regR = _reg64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0));
}
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{
sint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR));
else if (sprIndex == SPR_CTR)
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR));
else if (sprIndex == SPR_XER)
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER));
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, memOffset);
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, memOffset);
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY));
}
else if (name == PPCREC_NAME_XER_CA)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
x64Emit_movZX_reg64_mem8(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr));
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
x64Emit_mov_reg64_mem32(x64GenContext, regR, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue));
}
else
assert_dbg();
@ -1288,50 +1287,52 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
uint32 name = imlInstruction->op_r_name.name;
auto regR = _reg64(imlInstruction->op_r_name.regR);
if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 )
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), regR);
}
else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 )
{
uint32 sprIndex = (name - PPCREC_NAME_SPR0);
if (sprIndex == SPR_LR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.LR), regR);
else if (sprIndex == SPR_CTR)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), regR);
else if (sprIndex == SPR_XER)
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), regR);
else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7)
{
sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, memOffset, regR);
}
else
assert_dbg();
}
else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR);
}
else if (name == PPCREC_NAME_XER_CA)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR)));
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_XER_SO)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR)));
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST)
{
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.regR)));
x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR));
}
else if (name == PPCREC_NAME_CPU_MEMRES_EA)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), regR);
}
else if (name == PPCREC_NAME_CPU_MEMRES_VAL)
{
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.regR);
x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), regR);
}
else
assert_dbg();
@ -1432,16 +1433,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{
if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
{
codeGenerationFailed = true;
}
}
else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP)
{
if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false)
{
codeGenerationFailed = true;
}
}
else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK )
{