diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index dc07f9d0..6a8b1b97 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -241,6 +241,25 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr); return; } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) + { + cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now + cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now + // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64 + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR); + if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32) + { + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA); + x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later + // float to double + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr); + } + else + { + cemu_assert_unimplemented(); + } + return; + } uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index f736c2a7..cd269869 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -36,6 +36,30 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return "MULS"; else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) return "DIVS"; + else if (op == PPCREC_IML_OP_FPR_ASSIGN) + return "FMOV"; + else if (op == PPCREC_IML_OP_FPR_ADD) + return "FADD"; + else if (op == PPCREC_IML_OP_FPR_SUB) + return "FSUB"; + else if (op == PPCREC_IML_OP_FPR_MULTIPLY) + return "FMUL"; + else if (op == PPCREC_IML_OP_FPR_DIVIDE) + return "FDIV"; + else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64) + return "F32TOF64"; + else if (op == PPCREC_IML_OP_FPR_ABS) + return "FABS"; + else if (op == PPCREC_IML_OP_FPR_NEGATE) + return "FNEG"; + else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS) + return "FNABS"; + else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT) + return "F2I"; + else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT) + return "I2F"; + else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) + return "BITMOVE"; sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type); return _tempOpcodename; @@ -409,19 +433,24 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); } + else if (inst.type == PPCREC_IML_TYPE_FPR_R) + { + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR)); + } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) { - strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 60b7c6ca..997de4e9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -277,7 +277,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const } else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT || - operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT) + operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT || + operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) { registersUsed->writtenGPR1 = op_fpr_r_r.regR; registersUsed->readGPR1 = op_fpr_r_r.regA; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 3b3898e9..4df2a666 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -143,6 +143,9 @@ enum PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr + // Bitcast (FPR_R_R) + PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT, + // R_R_R + R_R_S32 PPCREC_IML_OP_ADD, // also R_R_R_CARRY PPCREC_IML_OP_SUB, diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index d0348e5a..d5693846 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -23,7 +23,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) return; - + boost::container::static_vector trackedMoves; // only track up to 4 copies IMLUsedRegisters registersUsed; sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) bool foundMatch = false; @@ -54,8 +54,24 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI continue; } } - - // check if FPR is overwritten (we can actually ignore read operations?) + // if the FPR is copied then keep track of it. We can expand the copies instead of the original + if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex) + { + if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex) + { + // unexpected no-op + break; + } + if (trackedMoves.size() >= trackedMoves.capacity()) + { + // we cant track any more moves, expand here + lastStore = i; + break; + } + trackedMoves.push_back(i); + continue; + } + // check if FPR is overwritten imlInstruction->CheckRegisterUsage(®istersUsed); if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex)) break; @@ -71,6 +87,24 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI if (foundMatch) { + // insert expand instructions for each target register of a move + sint32 positionBias = 0; + for (auto& trackedMove : trackedMoves) + { + sint32 realPosition = trackedMove + positionBias; + IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition; + if (realPosition >= lastStore) + break; // expand is inserted before this move + else + lastStore++; + + cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex); + cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64); + auto dstReg = imlMoveInstruction->op_fpr_r_r.regR; + IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move + newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg); + positionBias++; + } // insert expand instruction after store IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex)); @@ -90,23 +124,21 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI */ void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { - cemuLog_logDebugOnce(LogType::Force, "IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n"); - return; - // for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - // { - // for (sint32 i = 0; i < segIt->imlList.size(); i++) - // { - // IMLInstruction* imlInstruction = segIt->imlList.data() + i; - // if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - // { - // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); - // } - // else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - // { - // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); - // } - // } - // } + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + for (sint32 i = 0; i < segIt->imlList.size(); i++) + { + IMLInstruction* imlInstruction = segIt->imlList.data() + i; + if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE) + { + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE) + { + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + } + } + } } void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 0dbc073b..087b90f5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -685,45 +685,6 @@ void PPCRecompiler_init() PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize()); PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize()); - // setup GQR scale tables - - for (uint32 i = 0; i < 32; i++) - { - float a = 1.0f / (float)(1u << i); - float b = 0; - if (i == 0) - b = 4294967296.0f; - else - b = (float)(1u << (32u - i)); - - float ar = (float)(1u << i); - float br = 0; - if (i == 0) - br = 1.0f / 4294967296.0f; - else - br = 1.0f / (float)(1u << (32u - i)); - - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f; - - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b; - - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar; - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f; - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br; - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f; - - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar; - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar; - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br; - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br; - } - PPCRecompiler_initPlatform(); cemuLog_log(LogType::Force, "Recompiler initialized"); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 706855d4..47902630 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -136,11 +136,6 @@ typedef struct alignas(16) float _x64XMM_constFloatMin[2]; alignas(16) uint32 _x64XMM_flushDenormalMask1[4]; alignas(16) uint32 _x64XMM_flushDenormalMaskResetSignBits[4]; - // PSQ load/store scale tables - double _psq_ld_scale_ps0_ps1[64 * 2]; - double _psq_ld_scale_ps0_1[64 * 2]; - double _psq_st_scale_ps0_ps1[64 * 2]; - double _psq_st_scale_ps0_1[64 * 2]; // MXCSR uint32 _x64XMM_mxCsr_ftzOn; uint32 _x64XMM_mxCsr_ftzOff; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 7eb8a4b6..6e602b47 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -744,7 +744,21 @@ void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLR ); } -void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +void PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrRegister, IMLReg fprRegScaleOut, bool isLoad) +{ + IMLReg gprTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + // extract scale factor and sign extend it + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, gprTmp2, gqrRegister, 32 - ((isLoad ? 24 : 8)+7)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, gprTmp2, gprTmp2, (32-23)-7); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, gprTmp2, gprTmp2, 0x1FF<<23); + if (isLoad) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, gprTmp2, gprTmp2); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprTmp2, gprTmp2, 0x7F<<23); + // gprTmp2 now holds the scale float bits, bitcast to float + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT, fprRegScaleOut, gprTmp2); +} + +void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) { if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) { @@ -756,26 +770,42 @@ void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, E } if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true); + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg); + if(readPS1) { ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg); } } else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true); + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg); if(readPS1) { ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg); } } } @@ -812,14 +842,15 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); // extract the load type from the GQR register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); IMLSegment* caseSegment[6]; sint32 compareValues[6] = {0, 4, 5, 6, 7}; PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); for (sint32 i=0; i<5; i++) { IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] - PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, static_cast(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1); + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, static_cast(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1); // create the case jump instructions here because we need to add it last caseSegment[i]->AppendInstruction()->make_jump(); } @@ -839,11 +870,11 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } - PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, type, readPS1, gprA, imm, fprDPS0, fprDPS1); + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, type, readPS1, gprA, imm, fprDPS0, fprDPS1); return true; } -void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) { cemu_assert_debug(!storePS1 || fprDPS1.IsValid()); if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) @@ -856,10 +887,18 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, } else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false); + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); - // todo - scaling + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); + if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); else @@ -867,8 +906,8 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true); if(storePS1) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); - // todo - scaling + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); else @@ -878,9 +917,16 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, } else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false); + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); else @@ -888,8 +934,8 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true); if(storePS1) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); - // todo - scaling + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); else @@ -928,8 +974,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); // extract the load type from the GQR register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); IMLSegment* caseSegment[5]; sint32 compareValues[5] = {0, 4, 5, 6, 7}; @@ -937,7 +982,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc for (sint32 i=0; i<5; i++) { IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] - PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, static_cast(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1); + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, static_cast(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1); ppcImlGenContext->emitInst().make_jump(); // finalize case } return true; @@ -954,7 +999,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc return false; } - PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, type, storePS1, gprA, imm, fprDPS0, fprDPS1); + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, type, storePS1, gprA, imm, fprDPS0, fprDPS1); return true; }