mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-05 14:31:17 +12:00
PPCRec: Rework FP instructions to use scalar registers
This commit is contained in:
parent
eacac7f6c4
commit
bb5a7ce4ff
13 changed files with 1185 additions and 785 deletions
|
@ -10,6 +10,18 @@ namespace Espresso
|
|||
CR_BIT_INDEX_SO = 3,
|
||||
};
|
||||
|
||||
enum class PSQ_LOAD_TYPE
|
||||
{
|
||||
TYPE_F32 = 0,
|
||||
TYPE_UNUSED1 = 1,
|
||||
TYPE_UNUSED2 = 2,
|
||||
TYPE_UNUSED3 = 3,
|
||||
TYPE_U8 = 4,
|
||||
TYPE_U16 = 5,
|
||||
TYPE_S8 = 6,
|
||||
TYPE_S16 = 7,
|
||||
};
|
||||
|
||||
enum class PrimaryOpcode
|
||||
{
|
||||
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
|
||||
|
|
|
@ -1208,6 +1208,13 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
|
|||
{
|
||||
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
|
||||
}
|
||||
else if (name >= PPCREC_NAME_FPR0_NEW && name < (PPCREC_NAME_FPR0_NEW + 64))
|
||||
{
|
||||
sint32 regIndex = (name - PPCREC_NAME_FPR0_NEW) / 2;
|
||||
sint32 pairIndex = (name - PPCREC_NAME_FPR0_NEW) % 2;
|
||||
x64Gen_movddup_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double));
|
||||
// todo - use movsd here
|
||||
}
|
||||
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
|
||||
{
|
||||
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
|
||||
|
@ -1285,6 +1292,12 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
|
|||
{
|
||||
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
|
||||
}
|
||||
else if (name >= PPCREC_NAME_FPR0_NEW && name < (PPCREC_NAME_FPR0_NEW + 64))
|
||||
{
|
||||
sint32 regIndex = (name - PPCREC_NAME_FPR0_NEW) / 2;
|
||||
sint32 pairIndex = (name - PPCREC_NAME_FPR0_NEW) % 2;
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0));
|
||||
}
|
||||
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
|
||||
{
|
||||
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
|
||||
|
|
|
@ -205,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
|
@ -230,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
|
|||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
|
|
|
@ -269,7 +269,8 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
|
||||
uint8 mode = imlInstruction->op_storeLoad.mode;
|
||||
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 )
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 ) // lazy hack for now. Load only one value for SINGLE_INTO_PS0
|
||||
{
|
||||
// load byte swapped single into temporary FPR
|
||||
if( indexed )
|
||||
|
@ -682,6 +683,21 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg)
|
|||
// FPR op FPR
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
|
||||
{
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
|
||||
return;
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
|
||||
{
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
|
||||
return;
|
||||
}
|
||||
// all other cases operate on two floating-point registers
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
|
||||
|
@ -1025,6 +1041,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
|
|||
{
|
||||
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM )
|
||||
{
|
||||
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
|
||||
|
|
|
@ -213,6 +213,37 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
}
|
||||
}
|
||||
|
||||
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
// MOVSD <xmm>, [RSP+<imm>]
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == 15 )
|
||||
{
|
||||
// MOVSD <xmm>, [R15+<imm>]
|
||||
x64Gen_writeU8(x64GenContext, 0x36);
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
|
@ -561,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis
|
|||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
|
|
|
@ -189,10 +189,18 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di
|
|||
{
|
||||
strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999))
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 32))
|
||||
{
|
||||
strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0);
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0_NEW && inst.op_r_name.name < (PPCREC_NAME_FPR0_NEW + 32*2))
|
||||
{
|
||||
strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR0_NEW - inst.op_r_name.name)/2));
|
||||
if ((inst.op_r_name.name-PPCREC_NAME_FPR0_NEW)&1)
|
||||
strOutput.add(".ps1");
|
||||
else
|
||||
strOutput.add(".ps0");
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
|
||||
{
|
||||
strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);
|
||||
|
|
|
@ -239,6 +239,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
registersUsed->readGPR2 = op_storeLoad.registerData;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0:
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
|
||||
|
@ -278,6 +279,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerData;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0:
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
|
||||
|
@ -389,6 +391,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
|
||||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
@ -446,6 +454,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR1 = op_fpr_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
|
|
@ -158,10 +158,14 @@ enum
|
|||
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
|
||||
// Conversion (FPR_R_R)
|
||||
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
|
||||
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
|
||||
// PS
|
||||
PPCREC_IML_OP_FPR_SUM0,
|
||||
PPCREC_IML_OP_FPR_SUM1,
|
||||
|
||||
PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
|
||||
|
||||
// R_R_R only
|
||||
|
||||
|
@ -276,6 +280,7 @@ enum // IMLName
|
|||
PPCREC_NAME_R0 = 2000,
|
||||
PPCREC_NAME_SPR0 = 3000,
|
||||
PPCREC_NAME_FPR0 = 4000,
|
||||
PPCREC_NAME_FPR0_NEW = 4800, // similar to FPR0, but counting PS0 and PS1 separate. So fp3.ps1 is at offset 3 * 2 + 1
|
||||
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
|
||||
PPCREC_NAME_XER_CA = 6000, // carry bit from XER
|
||||
PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
|
||||
|
@ -757,6 +762,12 @@ struct IMLInstruction
|
|||
// load from memory
|
||||
void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||
{
|
||||
if (registerGQR.IsValid())
|
||||
{
|
||||
if ( mode == 0)
|
||||
__debugbreak();
|
||||
}
|
||||
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
|
|
|
@ -209,7 +209,9 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32
|
|||
{
|
||||
// UGQR 2 to 7 are initialized by the OS and we assume that games won't ever permanently touch those
|
||||
// todo - hack - replace with more accurate solution
|
||||
if (gqrIndex == 2)
|
||||
if (gqrIndex == 0)
|
||||
gqrValue = 0x00000000;
|
||||
else if (gqrIndex == 2)
|
||||
gqrValue = 0x00040004;
|
||||
else if (gqrIndex == 3)
|
||||
gqrValue = 0x00050005;
|
||||
|
|
|
@ -2093,7 +2093,10 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM
|
|||
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
|
||||
if (imlSegment->HasSuffixInstruction())
|
||||
{
|
||||
cemu_assert_debug(!currentRange); // currentRange should be NULL?
|
||||
if (currentRange)
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress);
|
||||
}
|
||||
for (auto& remainingRange : activeRanges)
|
||||
{
|
||||
cemu_assert_debug(!remainingRange->hasStore);
|
||||
|
|
|
@ -53,14 +53,10 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
|
||||
bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
|
||||
bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
|
||||
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
@ -88,3 +84,19 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
|
|||
// IML general
|
||||
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken);
|
||||
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count);
|
||||
|
||||
|
||||
class IMLRedirectInstOutput
|
||||
{
|
||||
public:
|
||||
IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment);
|
||||
~IMLRedirectInstOutput();
|
||||
|
||||
|
||||
private:
|
||||
ppcImlGenContext_t* m_context;
|
||||
IMLSegment* m_prevSegment;
|
||||
};
|
|
@ -87,8 +87,7 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex
|
|||
}
|
||||
|
||||
// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards
|
||||
template<typename F1n, typename F2n>
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken)
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken)
|
||||
{
|
||||
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
|
||||
|
||||
|
@ -118,6 +117,100 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P
|
|||
basicBlockInfo.appendSegment = segMerge;
|
||||
}
|
||||
|
||||
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index);
|
||||
|
||||
IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext)
|
||||
{
|
||||
m_prevSegment = ppcImlGenContext->currentOutputSegment;
|
||||
cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment);
|
||||
if (outputSegment == ppcImlGenContext->currentOutputSegment)
|
||||
{
|
||||
m_prevSegment = nullptr;
|
||||
return;
|
||||
}
|
||||
m_context->currentBasicBlock->appendSegment = outputSegment;
|
||||
m_context->currentOutputSegment = outputSegment;
|
||||
}
|
||||
|
||||
IMLRedirectInstOutput::~IMLRedirectInstOutput()
|
||||
{
|
||||
if (m_prevSegment)
|
||||
{
|
||||
m_context->currentBasicBlock->appendSegment = m_prevSegment;
|
||||
m_context->currentOutputSegment = m_prevSegment;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count
|
||||
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count)
|
||||
{
|
||||
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
|
||||
cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction
|
||||
|
||||
const sint32 numBranchSegments = count;// - 1; If we move the default case to the first segment we could avoid one extra non-conditional branch
|
||||
const sint32 numCaseSegments = count;
|
||||
|
||||
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1);
|
||||
IMLSegment** extraBranchSegments = segments.data();
|
||||
IMLSegment** caseSegments = segments.data() + numBranchSegments - 1;
|
||||
IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments];
|
||||
|
||||
// move links to the merge segment
|
||||
mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
|
||||
mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
|
||||
currentWriteSegment->SetLinkBranchTaken(nullptr);
|
||||
currentWriteSegment->SetLinkBranchNotTaken(nullptr);
|
||||
|
||||
for (sint32 i=0; i<count; i++)
|
||||
segmentsOut[i] = caseSegments[i];
|
||||
|
||||
IMLReg tmpBoolReg = _GetRegTemporaryS8(&ppcImlGenContext, 2);
|
||||
|
||||
// the first branch segment is the original current write segment
|
||||
auto GetBranchSegment = [&](sint32 index) {
|
||||
if (index == 0)
|
||||
return currentWriteSegment;
|
||||
else
|
||||
return extraBranchSegments[index - 1];
|
||||
};
|
||||
// link branch segments (taken: Link to case segment. NotTaken: Link to next branch segment. For the last one use a non-conditional jump)
|
||||
for (sint32 i=0; i<numBranchSegments; i++)
|
||||
{
|
||||
IMLSegment* seg = GetBranchSegment(i);
|
||||
if (i < numBranchSegments - 1)
|
||||
{
|
||||
seg->SetLinkBranchTaken(caseSegments[i]);
|
||||
seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1));
|
||||
seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ);
|
||||
seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
seg->SetLinkBranchTaken(caseSegments[i]);
|
||||
seg->AppendInstruction()->make_jump();
|
||||
}
|
||||
}
|
||||
// link case segments
|
||||
for (sint32 i=0; i<numCaseSegments; i++)
|
||||
{
|
||||
IMLSegment* seg = caseSegments[i];
|
||||
if (i < numCaseSegments - 1)
|
||||
{
|
||||
seg->SetLinkBranchTaken(mergeSegment);
|
||||
//seg->AppendInstruction()->make_jump(); -> Jumps are added after the instructions
|
||||
}
|
||||
else
|
||||
{
|
||||
// todo - the last segment doesnt need to jump
|
||||
seg->SetLinkBranchTaken(mergeSegment);
|
||||
//seg->AppendInstruction()->make_jump();
|
||||
}
|
||||
}
|
||||
ppcImlGenContext.currentOutputSegment = mergeSegment;
|
||||
basicBlockInfo.appendSegment = mergeSegment;
|
||||
}
|
||||
|
||||
IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat)
|
||||
{
|
||||
auto it = ppcImlGenContext->mappedRegs.find(mappedName);
|
||||
|
@ -212,8 +305,8 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
|
|||
return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
|
||||
}
|
||||
|
||||
// get throw-away register. Only valid for the scope of a single translated instruction
|
||||
// be careful to not collide with manually loaded temporary register
|
||||
// get throw-away register
|
||||
// be careful to not collide with other temporary register
|
||||
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
|
||||
{
|
||||
cemu_assert_debug(index < 4);
|
||||
|
@ -1891,23 +1984,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 12: // multiply scalar
|
||||
if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false)
|
||||
case 12: // PS_MULS0
|
||||
if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 13: // multiply scalar
|
||||
if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false)
|
||||
case 13: // PS_MULS1
|
||||
if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 14: // multiply add scalar
|
||||
if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false)
|
||||
case 14: // PS_MADDS0
|
||||
if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 15: // multiply add scalar
|
||||
if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false)
|
||||
case 15: // PS_MADDS1
|
||||
if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2515,12 +2608,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 56:
|
||||
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 57:
|
||||
if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2573,12 +2666,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
}
|
||||
break;
|
||||
case 60:
|
||||
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 61:
|
||||
if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2688,7 +2781,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
}
|
||||
|
||||
// returns false if code flow is not interrupted
|
||||
// continueDefaultPath: Controls if
|
||||
bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget)
|
||||
{
|
||||
hasBranchTarget = false;
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue