diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 0eeeabdb..dc07f9d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -241,10 +241,9 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr); return; } - // all other cases operate on two floating-point registers + uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN ) { x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index e4e9e776..bfb2aed5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -63,8 +63,7 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative); bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -86,8 +85,8 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken); -void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count); - +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken); // no else segment +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex); class IMLRedirectInstOutput { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index c147a8ff..e76a53fa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -117,6 +117,29 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P basicBlockInfo.appendSegment = segMerge; } +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 2); + IMLSegment* segBranchNotTaken = segments[0]; + IMLSegment* segMerge = segments[1]; + + // link the segments + segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(segMerge); + currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken); + segBranchNotTaken->SetLinkBranchNotTaken(segMerge); + // generate code for branch not taken segment + ppcImlGenContext.currentOutputSegment = segBranchNotTaken; + genSegmentBranchNotTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); + // make merge segment the new write segment + ppcImlGenContext.currentOutputSegment = segMerge; + basicBlockInfo.appendSegment = segMerge; +} + IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index); IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext) @@ -141,14 +164,13 @@ IMLRedirectInstOutput::~IMLRedirectInstOutput() } } - // compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count -void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count) +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex) { IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction - const sint32 numBranchSegments = count;// - 1; If we move the default case to the first segment we could avoid one extra non-conditional branch + const sint32 numBranchSegments = count + 1; const sint32 numCaseSegments = count; std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1); @@ -162,7 +184,7 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo currentWriteSegment->SetLinkBranchTaken(nullptr); currentWriteSegment->SetLinkBranchNotTaken(nullptr); - for (sint32 i=0; iSetLinkBranchTaken(caseSegments[i]); seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1)); seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ); @@ -187,7 +210,8 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo } else { - seg->SetLinkBranchTaken(caseSegments[i]); + cemu_assert_debug(defaultCaseIndex < numCaseSegments); + seg->SetLinkBranchTaken(caseSegments[defaultCaseIndex]); seg->AppendInstruction()->make_jump(); } } @@ -198,13 +222,11 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo if (i < numCaseSegments - 1) { seg->SetLinkBranchTaken(mergeSegment); - //seg->AppendInstruction()->make_jump(); -> Jumps are added after the instructions + // -> Jumps are added after the instructions } else { - // todo - the last segment doesnt need to jump seg->SetLinkBranchTaken(mergeSegment); - //seg->AppendInstruction()->make_jump(); } } ppcImlGenContext.currentOutputSegment = mergeSegment; @@ -2069,22 +2091,22 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 28: // multiply sub paired - if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode) == false) + case 28: // PS_MSUB + if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 29: // multiply add paired + case 29: // PS_MADD if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 30: // negative multiply sub paired - if (PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext, opcode) == false) + case 30: // PS_NMSUB + if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 31: // negative multiply add paired + case 31: // PS_NMADD if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index d297ec5c..7eb8a4b6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -39,10 +39,7 @@ IMLReg _GetFPRReg(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex, bool se return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64); } -/* - * Rounds the bottom double to single precision (if single precision accuracy is emulated) - */ -void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) +void PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); if( flushDenormals ) @@ -414,7 +411,7 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // multiply bottom double of frD with bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); // if paired single mode, copy frD ps0 to ps1 PSE_CopyResultToPs1(); return true; @@ -438,7 +435,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // move result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } @@ -448,7 +445,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // subtract bottom double of frB from bottom double of frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } @@ -474,7 +471,7 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // add bottom double of frD and bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } @@ -488,7 +485,7 @@ bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } @@ -497,10 +494,6 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - //FPRD(RD) = FPRD(RA) * FPRD(RC) + FPRD(RB); - //hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr; - //if( hCPU->PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); @@ -514,7 +507,7 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprRegisterTemp, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result if( fprD != fprRegisterTemp ) { @@ -528,9 +521,6 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - //hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //if( hCPU->PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); @@ -545,7 +535,7 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result if( fprD != fprRegisterTemp ) { @@ -573,46 +563,18 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprRegisterTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result if( fprD != fprRegisterTemp ) - { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); - } PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("FCMPO: Not implemented\n"); + // Not implemented return false; - - //sint32 crfD, frA, frB; - //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - //crfD >>= 2; - //IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - //IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - - //IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); - //IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); - //IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); - //IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); - - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); - - // todo - set fpscr - - //sint32 crfD, frA, frB; - //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - //crfD >>= 2; - //uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - //uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - //ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); - return true; } bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -683,7 +645,7 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } @@ -696,9 +658,7 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod DefinePS0(fprB, frB); DefinePS0(fprD, frD); if( fprD != fprB ) - { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); - } ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD); PSE_CopyResultToPs1(); return true; @@ -710,15 +670,11 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); if( opcode&PPC_OPC_RC ) - { return false; - } DefinePS0(fprB, frB); DefinePS0(fprD, frD); - // move frB to frD (if different register) if( frD != frB ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); - // negate frD ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } @@ -747,7 +703,7 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); return true; } @@ -768,30 +724,22 @@ void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLR IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); // min(reg, clampMax) ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT); - ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); // condition needs to be inverted because we skip if the condition is true PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, - [&](ppcImlGenContext_t& genCtx) - { - /* branch taken */ - genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax); - }, [&](ppcImlGenContext_t& genCtx) { /* branch not taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax); } ); // max(reg, clampMin) ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT); - ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, - [&](ppcImlGenContext_t& genCtx) - { - /* branch taken */ - genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin); - }, [&](ppcImlGenContext_t& genCtx) { /* branch not taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin); } ); } @@ -865,9 +813,9 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); // extract the load type from the GQR register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); - IMLSegment* caseSegment[5]; - sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case - PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); + IMLSegment* caseSegment[6]; + sint32 compareValues[6] = {0, 4, 5, 6, 7}; + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); for (sint32 i=0; i<5; i++) { IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] @@ -984,8 +932,8 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); IMLSegment* caseSegment[5]; - sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case - PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); + sint32 compareValues[5] = {0, 4, 5, 6, 7}; + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); for (sint32 i=0; i<5; i++) { IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] @@ -1042,8 +990,8 @@ bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 o ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1087,8 +1035,8 @@ bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1126,8 +1074,8 @@ bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1151,8 +1099,8 @@ bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprAps1, fprBps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1191,8 +1139,8 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1231,8 +1179,8 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1255,35 +1203,52 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); - DefineTempFPR(fprTemp0, 0); - DefineTempFPR(fprTemp1, 1); - - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction - if( frD == frA && frD != frB ) + if (frD != frA && frD != frB) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + if (frD == frC) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1); + } ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); + if( frD == frA && frD != frB ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); + } + else + { + // we multiply temporary by frA + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + // add frB + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); + // copy result to frD + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + } } // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1341,55 +1306,8 @@ bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 o return true; } -bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frB, frC; - frC = (opcode>>6)&0x1F; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = (hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1); - - DefinePS0(fprDps0, frD); - DefinePS1(fprDps1, frD); - DefinePS0(fprAps0, frA); - DefinePS1(fprAps1, frA); - DefinePS0(fprBps0, frB); - DefinePS1(fprBps1, frB); - DefinePS0(fprCps0, frC); - DefinePS1(fprCps1, frC); - - DefineTempFPR(fprTemp0, 0); - DefineTempFPR(fprTemp1, 1); - - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); - // todo: This instruction can be optimized so that it doesn't always use a temporary register - if( frD == frA && frD != frB ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); - } - else - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); - } - - // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); - return true; -} - -bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MSUB and PS_NMSUB +bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; @@ -1406,34 +1324,55 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); - DefineTempFPR(fprTemp0, 0); - DefineTempFPR(fprTemp1, 1); - - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); - // todo: This instruction can be optimized so that it doesn't always use a temporary register - if( frD == frA && frD != frB ) + if (frD != frA && frD != frB) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + if (frD == frC) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1); + } ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); } else { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); + if( frD == frA && frD != frB ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + } } // negate result - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); + if (withNegative) + { + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); + } // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1444,10 +1383,6 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //float s1 = (float)hCPU->fpr[frC].fp1; - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); @@ -1467,8 +1402,8 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op if (fprDps1 != fprCps1) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprCps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1479,10 +1414,6 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)hCPU->fpr[frC].fp0; - //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); @@ -1490,21 +1421,19 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); - // todo - avoid temporaries when possible + if (frB != frD) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); + } + else + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps0); - DefineTempFPR(fprTemp0, 0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); - - DefineTempFPR(fprTemp1, 1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprAps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); - - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprCps0); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1565,8 +1494,8 @@ bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opc ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1584,8 +1513,8 @@ bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1674,12 +1603,18 @@ bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 DefinePS0(frpDps0, frD); DefinePS1(frpDps1, frD); - DefineTempFPR(frpTemp, 0); - - // todo - optimize cases where a temporary is not necessary - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp); + if (frD != frB) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0); + } + else + { + DefineTempFPR(frpTemp, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp); + } return true; } @@ -1702,20 +1637,8 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PS_CMPO0: Not implemented\n"); + // Not implemented return false; -/* - sint32 crfD, frA, frB; - uint32 c=0; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - crfD = (opcode>>23)&0x7; - - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); - return true; -*/ } bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1724,11 +1647,7 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - // DefinePS1(frpAps0, frA); - // DefinePS1(frpBps0, frB); - // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPU_BOTTOM, frpAps0, frpBps0, crfD); - //crfD >>= 2; DefinePS0(fprA, frA); DefinePS0(fprB, frB); @@ -1766,4 +1685,4 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); return true; -} \ No newline at end of file +}