mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 21:11:17 +12:00
Compare commits
2 commits
de542410c2
...
ba09daf328
Author | SHA1 | Date | |
---|---|---|---|
|
ba09daf328 | ||
|
557aff4024 |
8 changed files with 169 additions and 84 deletions
|
@ -241,6 +241,25 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
|
|||
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
|
||||
return;
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
|
||||
{
|
||||
cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
|
||||
cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now
|
||||
// exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32)
|
||||
{
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later
|
||||
// float to double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
|
|
|
@ -36,6 +36,30 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
|
|||
return "MULS";
|
||||
else if (op == PPCREC_IML_OP_DIVIDE_SIGNED)
|
||||
return "DIVS";
|
||||
else if (op == PPCREC_IML_OP_FPR_ASSIGN)
|
||||
return "FMOV";
|
||||
else if (op == PPCREC_IML_OP_FPR_ADD)
|
||||
return "FADD";
|
||||
else if (op == PPCREC_IML_OP_FPR_SUB)
|
||||
return "FSUB";
|
||||
else if (op == PPCREC_IML_OP_FPR_MULTIPLY)
|
||||
return "FMUL";
|
||||
else if (op == PPCREC_IML_OP_FPR_DIVIDE)
|
||||
return "FDIV";
|
||||
else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
|
||||
return "F32TOF64";
|
||||
else if (op == PPCREC_IML_OP_FPR_ABS)
|
||||
return "FABS";
|
||||
else if (op == PPCREC_IML_OP_FPR_NEGATE)
|
||||
return "FNEG";
|
||||
else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS)
|
||||
return "FNABS";
|
||||
else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT)
|
||||
return "F2I";
|
||||
else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
|
||||
return "I2F";
|
||||
else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
|
||||
return "BITMOVE";
|
||||
|
||||
sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type);
|
||||
return _tempOpcodename;
|
||||
|
@ -409,19 +433,24 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di
|
|||
strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32);
|
||||
strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode);
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
{
|
||||
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
{
|
||||
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R)
|
||||
{
|
||||
strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst));
|
||||
strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB));
|
||||
}
|
||||
else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK)
|
||||
|
|
|
@ -277,7 +277,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
|
||||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
|
||||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ||
|
||||
operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
|
|
|
@ -143,6 +143,9 @@ enum
|
|||
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
|
||||
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
|
||||
|
||||
// Bitcast (FPR_R_R)
|
||||
PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT,
|
||||
|
||||
// R_R_R + R_R_S32
|
||||
PPCREC_IML_OP_ADD, // also R_R_R_CARRY
|
||||
PPCREC_IML_OP_SUB,
|
||||
|
|
|
@ -23,7 +23,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad;
|
||||
if (imlInstructionLoad->op_storeLoad.flags2.notExpanded)
|
||||
return;
|
||||
|
||||
boost::container::static_vector<sint32, 4> trackedMoves; // only track up to 4 copies
|
||||
IMLUsedRegisters registersUsed;
|
||||
sint32 scanRangeEnd = std::min<sint32>(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
|
||||
bool foundMatch = false;
|
||||
|
@ -54,8 +54,24 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// check if FPR is overwritten (we can actually ignore read operations?)
|
||||
// if the FPR is copied then keep track of it. We can expand the copies instead of the original
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex)
|
||||
{
|
||||
if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex)
|
||||
{
|
||||
// unexpected no-op
|
||||
break;
|
||||
}
|
||||
if (trackedMoves.size() >= trackedMoves.capacity())
|
||||
{
|
||||
// we cant track any more moves, expand here
|
||||
lastStore = i;
|
||||
break;
|
||||
}
|
||||
trackedMoves.push_back(i);
|
||||
continue;
|
||||
}
|
||||
// check if FPR is overwritten
|
||||
imlInstruction->CheckRegisterUsage(®istersUsed);
|
||||
if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex))
|
||||
break;
|
||||
|
@ -71,6 +87,24 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
|
||||
if (foundMatch)
|
||||
{
|
||||
// insert expand instructions for each target register of a move
|
||||
sint32 positionBias = 0;
|
||||
for (auto& trackedMove : trackedMoves)
|
||||
{
|
||||
sint32 realPosition = trackedMove + positionBias;
|
||||
IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition;
|
||||
if (realPosition >= lastStore)
|
||||
break; // expand is inserted before this move
|
||||
else
|
||||
lastStore++;
|
||||
|
||||
cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex);
|
||||
cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64);
|
||||
auto dstReg = imlMoveInstruction->op_fpr_r_r.regR;
|
||||
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move
|
||||
newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg);
|
||||
positionBias++;
|
||||
}
|
||||
// insert expand instruction after store
|
||||
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
|
||||
newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex));
|
||||
|
@ -90,23 +124,21 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
*/
|
||||
void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
cemuLog_logDebugOnce(LogType::Force, "IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n");
|
||||
return;
|
||||
// for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
// {
|
||||
// for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
// {
|
||||
// IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
// if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
// {
|
||||
// PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
// }
|
||||
// else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
// {
|
||||
// PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
|
||||
|
|
|
@ -685,45 +685,6 @@ void PPCRecompiler_init()
|
|||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||
|
||||
// setup GQR scale tables
|
||||
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
float a = 1.0f / (float)(1u << i);
|
||||
float b = 0;
|
||||
if (i == 0)
|
||||
b = 4294967296.0f;
|
||||
else
|
||||
b = (float)(1u << (32u - i));
|
||||
|
||||
float ar = (float)(1u << i);
|
||||
float br = 0;
|
||||
if (i == 0)
|
||||
br = 1.0f / 4294967296.0f;
|
||||
else
|
||||
br = 1.0f / (float)(1u << (32u - i));
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b;
|
||||
ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f;
|
||||
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br;
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
|
||||
}
|
||||
|
||||
PPCRecompiler_initPlatform();
|
||||
|
||||
cemuLog_log(LogType::Force, "Recompiler initialized");
|
||||
|
|
|
@ -136,11 +136,6 @@ typedef struct
|
|||
alignas(16) float _x64XMM_constFloatMin[2];
|
||||
alignas(16) uint32 _x64XMM_flushDenormalMask1[4];
|
||||
alignas(16) uint32 _x64XMM_flushDenormalMaskResetSignBits[4];
|
||||
// PSQ load/store scale tables
|
||||
double _psq_ld_scale_ps0_ps1[64 * 2];
|
||||
double _psq_ld_scale_ps0_1[64 * 2];
|
||||
double _psq_st_scale_ps0_ps1[64 * 2];
|
||||
double _psq_st_scale_ps0_1[64 * 2];
|
||||
// MXCSR
|
||||
uint32 _x64XMM_mxCsr_ftzOn;
|
||||
uint32 _x64XMM_mxCsr_ftzOff;
|
||||
|
|
|
@ -744,7 +744,21 @@ void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLR
|
|||
);
|
||||
}
|
||||
|
||||
void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
|
||||
void PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrRegister, IMLReg fprRegScaleOut, bool isLoad)
|
||||
{
|
||||
IMLReg gprTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
|
||||
// extract scale factor and sign extend it
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, gprTmp2, gqrRegister, 32 - ((isLoad ? 24 : 8)+7));
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, gprTmp2, gprTmp2, (32-23)-7);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, gprTmp2, gprTmp2, 0x1FF<<23);
|
||||
if (isLoad)
|
||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, gprTmp2, gprTmp2);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprTmp2, gprTmp2, 0x7F<<23);
|
||||
// gprTmp2 now holds the scale float bits, bitcast to float
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT, fprRegScaleOut, gprTmp2);
|
||||
}
|
||||
|
||||
void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
|
||||
{
|
||||
if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32)
|
||||
{
|
||||
|
@ -756,26 +770,42 @@ void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, E
|
|||
}
|
||||
if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16)
|
||||
{
|
||||
// get scale factor
|
||||
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
|
||||
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
|
||||
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true);
|
||||
|
||||
bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16);
|
||||
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp);
|
||||
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg);
|
||||
|
||||
if(readPS1)
|
||||
{
|
||||
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg);
|
||||
}
|
||||
}
|
||||
else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8)
|
||||
{
|
||||
// get scale factor
|
||||
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
|
||||
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
|
||||
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true);
|
||||
|
||||
bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8);
|
||||
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg);
|
||||
if(readPS1)
|
||||
{
|
||||
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -812,14 +842,15 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
|
||||
IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
// extract the load type from the GQR register
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7);
|
||||
IMLSegment* caseSegment[6];
|
||||
sint32 compareValues[6] = {0, 4, 5, 6, 7};
|
||||
PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0);
|
||||
for (sint32 i=0; i<5; i++)
|
||||
{
|
||||
IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i]
|
||||
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
// create the case jump instructions here because we need to add it last
|
||||
caseSegment[i]->AppendInstruction()->make_jump();
|
||||
}
|
||||
|
@ -839,11 +870,11 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
return false;
|
||||
}
|
||||
|
||||
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, type, readPS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, type, readPS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
return true;
|
||||
}
|
||||
|
||||
void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
|
||||
void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
|
||||
{
|
||||
cemu_assert_debug(!storePS1 || fprDPS1.IsValid());
|
||||
if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32)
|
||||
|
@ -856,10 +887,18 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext,
|
|||
}
|
||||
else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16)
|
||||
{
|
||||
// get scale factor
|
||||
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
|
||||
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
|
||||
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false);
|
||||
|
||||
bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16);
|
||||
IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0);
|
||||
|
||||
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0);
|
||||
// todo - scaling
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
|
||||
|
||||
if (isSigned)
|
||||
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767);
|
||||
else
|
||||
|
@ -867,8 +906,8 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext,
|
|||
ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true);
|
||||
if(storePS1)
|
||||
{
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1);
|
||||
// todo - scaling
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
|
||||
if (isSigned)
|
||||
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767);
|
||||
else
|
||||
|
@ -878,9 +917,16 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext,
|
|||
}
|
||||
else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8)
|
||||
{
|
||||
// get scale factor
|
||||
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
|
||||
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
|
||||
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false);
|
||||
|
||||
bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8);
|
||||
IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0);
|
||||
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
|
||||
if (isSigned)
|
||||
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127);
|
||||
else
|
||||
|
@ -888,8 +934,8 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext,
|
|||
ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true);
|
||||
if(storePS1)
|
||||
{
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1);
|
||||
// todo - scaling
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
|
||||
if (isSigned)
|
||||
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127);
|
||||
else
|
||||
|
@ -928,8 +974,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
|
|||
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
|
||||
IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
// extract the load type from the GQR register
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7);
|
||||
|
||||
IMLSegment* caseSegment[5];
|
||||
sint32 compareValues[5] = {0, 4, 5, 6, 7};
|
||||
|
@ -937,7 +982,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
|
|||
for (sint32 i=0; i<5; i++)
|
||||
{
|
||||
IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i]
|
||||
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
ppcImlGenContext->emitInst().make_jump(); // finalize case
|
||||
}
|
||||
return true;
|
||||
|
@ -954,7 +999,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
|
|||
return false;
|
||||
}
|
||||
|
||||
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, type, storePS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, type, storePS1, gprA, imm, fprDPS0, fprDPS1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue