PPCRec: Rework FP instructions to use scalar registers

This commit is contained in:
Exzap 2025-04-28 12:58:53 +02:00
parent eacac7f6c4
commit bb5a7ce4ff
13 changed files with 1185 additions and 785 deletions

View file

@ -10,6 +10,18 @@ namespace Espresso
CR_BIT_INDEX_SO = 3,
};
enum class PSQ_LOAD_TYPE
{
TYPE_F32 = 0,
TYPE_UNUSED1 = 1,
TYPE_UNUSED2 = 2,
TYPE_UNUSED3 = 3,
TYPE_U8 = 4,
TYPE_U16 = 5,
TYPE_S8 = 6,
TYPE_S16 = 7,
};
enum class PrimaryOpcode
{
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)

View file

@ -1208,6 +1208,13 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
{
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
}
else if (name >= PPCREC_NAME_FPR0_NEW && name < (PPCREC_NAME_FPR0_NEW + 64))
{
sint32 regIndex = (name - PPCREC_NAME_FPR0_NEW) / 2;
sint32 pairIndex = (name - PPCREC_NAME_FPR0_NEW) % 2;
x64Gen_movddup_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double));
// todo - use movsd here
}
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
{
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));
@ -1285,6 +1292,12 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
{
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
}
else if (name >= PPCREC_NAME_FPR0_NEW && name < (PPCREC_NAME_FPR0_NEW + 64))
{
sint32 regIndex = (name - PPCREC_NAME_FPR0_NEW) / 2;
sint32 pairIndex = (name - PPCREC_NAME_FPR0_NEW) % 2;
x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0));
}
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
{
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0));

View file

@ -205,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
@ -230,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);

View file

@ -269,7 +269,8 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
uint8 mode = imlInstruction->op_storeLoad.mode;
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 )
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ||
mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 ) // lazy hack for now. Load only one value for SINGLE_INTO_PS0
{
// load byte swapped single into temporary FPR
if( indexed )
@ -682,6 +683,21 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg)
// FPR op FPR
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
{
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
return;
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
{
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
return;
}
// all other cases operate on two floating-point registers
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
@ -1025,6 +1041,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
{
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
{
x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
}
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM )
{
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));

View file

@ -213,6 +213,37 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
}
}
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE2
if( memRegister == X86_REG_RSP )
{
// MOVSD <xmm>, [RSP+<imm>]
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x10);
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
x64Gen_writeU8(x64GenContext, 0x24);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else if( memRegister == 15 )
{
// MOVSD <xmm>, [R15+<imm>]
x64Gen_writeU8(x64GenContext, 0x36);
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x10);
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
x64Gen_writeU32(x64GenContext, memImmU32);
}
else
{
assert_dbg();
}
}
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
{
// SSE3
@ -561,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
}
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
{
// SSE2
x64Gen_writeU8(x64GenContext, 0xF2);
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
x64Gen_writeU8(x64GenContext, 0x0F);
x64Gen_writeU8(x64GenContext, 0x2A);
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
}
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
{
// SSE2

View file

@ -189,10 +189,18 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di
{
strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
}
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999))
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 32))
{
strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0);
}
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0_NEW && inst.op_r_name.name < (PPCREC_NAME_FPR0_NEW + 32*2))
{
strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR0_NEW - inst.op_r_name.name)/2));
if ((inst.op_r_name.name-PPCREC_NAME_FPR0_NEW)&1)
strOutput.add(".ps1");
else
strOutput.add(".ps0");
}
else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
{
strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0);

View file

@ -239,6 +239,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
registersUsed->readGPR2 = op_storeLoad.registerData;
break;
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0:
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
@ -278,6 +279,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
registersUsed->readGPR3 = op_storeLoad.registerData;
break;
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0:
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
@ -389,6 +391,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readGPR1 = op_fpr_r_r.regA;
registersUsed->readGPR2 = op_fpr_r_r.regR;
}
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
{
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
registersUsed->readGPR1 = op_fpr_r_r.regA;
}
else
cemu_assert_unimplemented();
}
@ -446,6 +454,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readGPR1 = op_fpr_r.regR;
registersUsed->writtenGPR1 = op_fpr_r.regR;
}
else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
{
registersUsed->writtenGPR1 = op_fpr_r.regR;
}
else
cemu_assert_unimplemented();
}

View file

@ -158,10 +158,14 @@ enum
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
// Conversion (FPR_R_R)
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
// PS
PPCREC_IML_OP_FPR_SUM0,
PPCREC_IML_OP_FPR_SUM1,
PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
// R_R_R only
@ -276,6 +280,7 @@ enum // IMLName
PPCREC_NAME_R0 = 2000,
PPCREC_NAME_SPR0 = 3000,
PPCREC_NAME_FPR0 = 4000,
PPCREC_NAME_FPR0_NEW = 4800, // similar to FPR0, but counting PS0 and PS1 separate. So fp3.ps1 is at offset 3 * 2 + 1
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
PPCREC_NAME_XER_CA = 6000, // carry bit from XER
PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
@ -757,6 +762,12 @@ struct IMLInstruction
// load from memory
void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
{
if (registerGQR.IsValid())
{
if ( mode == 0)
__debugbreak();
}
this->type = PPCREC_IML_TYPE_FPR_LOAD;
this->operation = 0;
this->op_storeLoad.registerData = registerDestination;

View file

@ -209,7 +209,9 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32
{
// UGQR 2 to 7 are initialized by the OS and we assume that games won't ever permanently touch those
// todo - hack - replace with more accurate solution
if (gqrIndex == 2)
if (gqrIndex == 0)
gqrValue = 0x00000000;
else if (gqrIndex == 2)
gqrValue = 0x00040004;
else if (gqrIndex == 3)
gqrValue = 0x00050005;

View file

@ -2093,7 +2093,10 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
if (imlSegment->HasSuffixInstruction())
{
cemu_assert_debug(!currentRange); // currentRange should be NULL?
if (currentRange)
{
cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress);
}
for (auto& remainingRange : activeRanges)
{
cemu_assert_debug(!remainingRange->hasStore);

View file

@ -53,14 +53,10 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
@ -88,3 +84,19 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
// IML general
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken);
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count);
class IMLRedirectInstOutput
{
public:
IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment);
~IMLRedirectInstOutput();
private:
ppcImlGenContext_t* m_context;
IMLSegment* m_prevSegment;
};

View file

@ -87,8 +87,7 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex
}
// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards
template<typename F1n, typename F2n>
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken)
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken)
{
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
@ -118,6 +117,100 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P
basicBlockInfo.appendSegment = segMerge;
}
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index);
IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext)
{
m_prevSegment = ppcImlGenContext->currentOutputSegment;
cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment);
if (outputSegment == ppcImlGenContext->currentOutputSegment)
{
m_prevSegment = nullptr;
return;
}
m_context->currentBasicBlock->appendSegment = outputSegment;
m_context->currentOutputSegment = outputSegment;
}
IMLRedirectInstOutput::~IMLRedirectInstOutput()
{
if (m_prevSegment)
{
m_context->currentBasicBlock->appendSegment = m_prevSegment;
m_context->currentOutputSegment = m_prevSegment;
}
}
// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count)
{
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction
const sint32 numBranchSegments = count;// - 1; If we move the default case to the first segment we could avoid one extra non-conditional branch
const sint32 numCaseSegments = count;
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1);
IMLSegment** extraBranchSegments = segments.data();
IMLSegment** caseSegments = segments.data() + numBranchSegments - 1;
IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments];
// move links to the merge segment
mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
currentWriteSegment->SetLinkBranchTaken(nullptr);
currentWriteSegment->SetLinkBranchNotTaken(nullptr);
for (sint32 i=0; i<count; i++)
segmentsOut[i] = caseSegments[i];
IMLReg tmpBoolReg = _GetRegTemporaryS8(&ppcImlGenContext, 2);
// the first branch segment is the original current write segment
auto GetBranchSegment = [&](sint32 index) {
if (index == 0)
return currentWriteSegment;
else
return extraBranchSegments[index - 1];
};
// link branch segments (taken: Link to case segment. NotTaken: Link to next branch segment. For the last one use a non-conditional jump)
for (sint32 i=0; i<numBranchSegments; i++)
{
IMLSegment* seg = GetBranchSegment(i);
if (i < numBranchSegments - 1)
{
seg->SetLinkBranchTaken(caseSegments[i]);
seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1));
seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ);
seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true);
}
else
{
seg->SetLinkBranchTaken(caseSegments[i]);
seg->AppendInstruction()->make_jump();
}
}
// link case segments
for (sint32 i=0; i<numCaseSegments; i++)
{
IMLSegment* seg = caseSegments[i];
if (i < numCaseSegments - 1)
{
seg->SetLinkBranchTaken(mergeSegment);
//seg->AppendInstruction()->make_jump(); -> Jumps are added after the instructions
}
else
{
// todo - the last segment doesnt need to jump
seg->SetLinkBranchTaken(mergeSegment);
//seg->AppendInstruction()->make_jump();
}
}
ppcImlGenContext.currentOutputSegment = mergeSegment;
basicBlockInfo.appendSegment = mergeSegment;
}
IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat)
{
auto it = ppcImlGenContext->mappedRegs.find(mappedName);
@ -212,8 +305,8 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
}
// get throw-away register. Only valid for the scope of a single translated instruction
// be careful to not collide with manually loaded temporary register
// get throw-away register
// be careful to not collide with other temporary register
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
{
cemu_assert_debug(index < 4);
@ -1891,23 +1984,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 12: // multiply scalar
if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false)
case 12: // PS_MULS0
if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 13: // multiply scalar
if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false)
case 13: // PS_MULS1
if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 14: // multiply add scalar
if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false)
case 14: // PS_MADDS0
if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 15: // multiply add scalar
if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false)
case 15: // PS_MADDS1
if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@ -2515,12 +2608,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
ppcImlGenContext->hasFPUInstruction = true;
break;
case 56:
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false)
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 57:
if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false)
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@ -2573,12 +2666,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
}
break;
case 60:
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false)
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
case 61:
if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false)
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false)
unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true;
break;
@ -2688,7 +2781,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
}
// returns false if code flow is not interrupted
// continueDefaultPath: Controls if
bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget)
{
hasBranchTarget = false;

File diff suppressed because it is too large Load diff