PPCRec: Small optimizations and tweaks

This commit is contained in:
Exzap 2025-05-07 20:34:11 +02:00
parent 5fd0d9b4ed
commit 36ac5ef5a9
4 changed files with 183 additions and 244 deletions

View file

@ -241,10 +241,9 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr); x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
return; return;
} }
// all other cases operate on two floating-point registers
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN ) if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN )
{ {
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);

View file

@ -63,8 +63,7 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative);
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
@ -86,8 +85,8 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken); void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken);
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count); void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken); // no else segment
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex);
class IMLRedirectInstOutput class IMLRedirectInstOutput
{ {

View file

@ -117,6 +117,29 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P
basicBlockInfo.appendSegment = segMerge; basicBlockInfo.appendSegment = segMerge;
} }
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken)
{
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 2);
IMLSegment* segBranchNotTaken = segments[0];
IMLSegment* segMerge = segments[1];
// link the segments
segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
currentWriteSegment->SetLinkBranchTaken(segMerge);
currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken);
segBranchNotTaken->SetLinkBranchNotTaken(segMerge);
// generate code for branch not taken segment
ppcImlGenContext.currentOutputSegment = segBranchNotTaken;
genSegmentBranchNotTaken(ppcImlGenContext);
cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken);
// make merge segment the new write segment
ppcImlGenContext.currentOutputSegment = segMerge;
basicBlockInfo.appendSegment = segMerge;
}
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index); IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index);
IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext) IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext)
@ -141,14 +164,13 @@ IMLRedirectInstOutput::~IMLRedirectInstOutput()
} }
} }
// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count // compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count) void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex)
{ {
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction
const sint32 numBranchSegments = count;// - 1; If we move the default case to the first segment we could avoid one extra non-conditional branch const sint32 numBranchSegments = count + 1;
const sint32 numCaseSegments = count; const sint32 numCaseSegments = count;
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1); std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1);
@ -162,7 +184,7 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo
currentWriteSegment->SetLinkBranchTaken(nullptr); currentWriteSegment->SetLinkBranchTaken(nullptr);
currentWriteSegment->SetLinkBranchNotTaken(nullptr); currentWriteSegment->SetLinkBranchNotTaken(nullptr);
for (sint32 i=0; i<count; i++) for (sint32 i=0; i<numCaseSegments; i++)
segmentsOut[i] = caseSegments[i]; segmentsOut[i] = caseSegments[i];
IMLReg tmpBoolReg = _GetRegTemporaryS8(&ppcImlGenContext, 2); IMLReg tmpBoolReg = _GetRegTemporaryS8(&ppcImlGenContext, 2);
@ -180,6 +202,7 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo
IMLSegment* seg = GetBranchSegment(i); IMLSegment* seg = GetBranchSegment(i);
if (i < numBranchSegments - 1) if (i < numBranchSegments - 1)
{ {
cemu_assert_debug(i < numCaseSegments);
seg->SetLinkBranchTaken(caseSegments[i]); seg->SetLinkBranchTaken(caseSegments[i]);
seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1)); seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1));
seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ); seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ);
@ -187,7 +210,8 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo
} }
else else
{ {
seg->SetLinkBranchTaken(caseSegments[i]); cemu_assert_debug(defaultCaseIndex < numCaseSegments);
seg->SetLinkBranchTaken(caseSegments[defaultCaseIndex]);
seg->AppendInstruction()->make_jump(); seg->AppendInstruction()->make_jump();
} }
} }
@ -198,13 +222,11 @@ void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenCo
if (i < numCaseSegments - 1) if (i < numCaseSegments - 1)
{ {
seg->SetLinkBranchTaken(mergeSegment); seg->SetLinkBranchTaken(mergeSegment);
//seg->AppendInstruction()->make_jump(); -> Jumps are added after the instructions // -> Jumps are added after the instructions
} }
else else
{ {
// todo - the last segment doesnt need to jump
seg->SetLinkBranchTaken(mergeSegment); seg->SetLinkBranchTaken(mergeSegment);
//seg->AppendInstruction()->make_jump();
} }
} }
ppcImlGenContext.currentOutputSegment = mergeSegment; ppcImlGenContext.currentOutputSegment = mergeSegment;
@ -2069,22 +2091,22 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true; ppcImlGenContext->hasFPUInstruction = true;
break; break;
case 28: // multiply sub paired case 28: // PS_MSUB
if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, false) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true; ppcImlGenContext->hasFPUInstruction = true;
break; break;
case 29: // multiply add paired case 29: // PS_MADD
if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true; ppcImlGenContext->hasFPUInstruction = true;
break; break;
case 30: // negative multiply sub paired case 30: // PS_NMSUB
if (PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, true) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true; ppcImlGenContext->hasFPUInstruction = true;
break; break;
case 31: // negative multiply add paired case 31: // PS_NMADD
if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
ppcImlGenContext->hasFPUInstruction = true; ppcImlGenContext->hasFPUInstruction = true;

View file

@ -39,10 +39,7 @@ IMLReg _GetFPRReg(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex, bool se
return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64);
} }
/* void PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false)
* Rounds the bottom double to single precision (if single precision accuracy is emulated)
*/
void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false)
{ {
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister);
if( flushDenormals ) if( flushDenormals )
@ -414,7 +411,7 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// multiply bottom double of frD with bottom double of frB // multiply bottom double of frD with bottom double of frB
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
// if paired single mode, copy frD ps0 to ps1 // if paired single mode, copy frD ps0 to ps1
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
@ -438,7 +435,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// move result to frD // move result to frD
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
} }
@ -448,7 +445,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// subtract bottom double of frB from bottom double of frD // subtract bottom double of frB from bottom double of frD
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
} }
@ -474,7 +471,7 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// add bottom double of frD and bottom double of frB // add bottom double of frD and bottom double of frB
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
} }
@ -488,7 +485,7 @@ bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
DefinePS0(fprB, frB); DefinePS0(fprB, frB);
DefinePS0(fprD, frD); DefinePS0(fprD, frD);
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
} }
@ -497,10 +494,6 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
{ {
sint32 frD, frA, frB, frC; sint32 frD, frA, frB, frC;
PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC);
//FPRD(RD) = FPRD(RA) * FPRD(RC) + FPRD(RB);
//hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr;
//if( hCPU->PSE )
// hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
DefinePS0(fprA, frA); DefinePS0(fprA, frA);
DefinePS0(fprB, frB); DefinePS0(fprB, frB);
DefinePS0(fprC, frC); DefinePS0(fprC, frC);
@ -514,7 +507,7 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprRegisterTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprRegisterTemp, fprB);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp);
// set result // set result
if( fprD != fprRegisterTemp ) if( fprD != fprRegisterTemp )
{ {
@ -528,9 +521,6 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
{ {
sint32 frD, frA, frB, frC; sint32 frD, frA, frB, frC;
PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC);
//hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0);
//if( hCPU->PSE )
// hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;
DefinePS0(fprA, frA); DefinePS0(fprA, frA);
DefinePS0(fprB, frB); DefinePS0(fprB, frB);
DefinePS0(fprC, frC); DefinePS0(fprC, frC);
@ -545,7 +535,7 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp);
// set result // set result
if( fprD != fprRegisterTemp ) if( fprD != fprRegisterTemp )
{ {
@ -573,46 +563,18 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB);
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprRegisterTemp); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprRegisterTemp);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp);
// set result // set result
if( fprD != fprRegisterTemp ) if( fprD != fprRegisterTemp )
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp);
}
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
} }
bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{ {
printf("FCMPO: Not implemented\n"); // Not implemented
return false; return false;
//sint32 crfD, frA, frB;
//PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
//crfD >>= 2;
//IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA);
//IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB);
//IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT);
//IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT);
//IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ);
//IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ);
//ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U);
// todo - set fpscr
//sint32 crfD, frA, frB;
//PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
//crfD >>= 2;
//uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
//uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
//ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD);
return true;
} }
bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -683,7 +645,7 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
DefinePS0(fprD, frD); DefinePS0(fprD, frD);
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
} }
@ -696,9 +658,7 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
DefinePS0(fprB, frB); DefinePS0(fprB, frB);
DefinePS0(fprD, frD); DefinePS0(fprD, frD);
if( fprD != fprB ) if( fprD != fprB )
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB);
}
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD);
PSE_CopyResultToPs1(); PSE_CopyResultToPs1();
return true; return true;
@ -710,15 +670,11 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_OPC_TEMPL_X(opcode, frD, frA, frB);
PPC_ASSERT(frA==0); PPC_ASSERT(frA==0);
if( opcode&PPC_OPC_RC ) if( opcode&PPC_OPC_RC )
{
return false; return false;
}
DefinePS0(fprB, frB); DefinePS0(fprB, frB);
DefinePS0(fprD, frD); DefinePS0(fprD, frD);
// move frB to frD (if different register)
if( frD != frB ) if( frD != frB )
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB);
// negate frD
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD);
return true; return true;
} }
@ -747,7 +703,7 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op
DefinePS0(fprD, frD); DefinePS0(fprD, frD);
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD);
return true; return true;
} }
@ -768,30 +724,22 @@ void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLR
IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
// min(reg, clampMax) // min(reg, clampMax)
ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT); ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT);
ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); // condition needs to be inverted because we skip if the condition is true
PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
[&](ppcImlGenContext_t& genCtx)
{
/* branch taken */
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax);
},
[&](ppcImlGenContext_t& genCtx) [&](ppcImlGenContext_t& genCtx)
{ {
/* branch not taken */ /* branch not taken */
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax);
} }
); );
// max(reg, clampMin) // max(reg, clampMin)
ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT); ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT);
ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false);
PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
[&](ppcImlGenContext_t& genCtx)
{
/* branch taken */
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin);
},
[&](ppcImlGenContext_t& genCtx) [&](ppcImlGenContext_t& genCtx)
{ {
/* branch not taken */ /* branch not taken */
genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin);
} }
); );
} }
@ -865,9 +813,9 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
// extract the load type from the GQR register // extract the load type from the GQR register
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7);
IMLSegment* caseSegment[5]; IMLSegment* caseSegment[6];
sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case sint32 compareValues[6] = {0, 4, 5, 6, 7};
PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0);
for (sint32 i=0; i<5; i++) for (sint32 i=0; i<5; i++)
{ {
IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i]
@ -984,8 +932,8 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7);
IMLSegment* caseSegment[5]; IMLSegment* caseSegment[5];
sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case sint32 compareValues[5] = {0, 4, 5, 6, 7};
PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0);
for (sint32 i=0; i<5; i++) for (sint32 i=0; i<5; i++)
{ {
IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i]
@ -1042,8 +990,8 @@ bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 o
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1087,8 +1035,8 @@ bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1126,8 +1074,8 @@ bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1);
} }
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1151,8 +1099,8 @@ bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprAps1, fprBps1); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprAps1, fprBps1);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1191,8 +1139,8 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1);
} }
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1231,8 +1179,8 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1);
} }
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1255,13 +1203,29 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op
DefinePS0(fprCps0, frC); DefinePS0(fprCps0, frC);
DefinePS1(fprCps1, frC); DefinePS1(fprCps1, frC);
if (frD != frA && frD != frB)
{
if (frD == frC)
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1);
}
else
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1);
}
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1);
}
else
{
DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp0, 0);
DefineTempFPR(fprTemp1, 1); DefineTempFPR(fprTemp1, 1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1);
// todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register
// if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction
if( frD == frA && frD != frB ) if( frD == frA && frD != frB )
{ {
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0);
@ -1281,9 +1245,10 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1);
} }
}
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1341,55 +1306,8 @@ bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 o
return true; return true;
} }
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) // PS_MSUB and PS_NMSUB
{ bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative)
sint32 frD, frA, frB, frC;
frC = (opcode>>6)&0x1F;
frB = (opcode>>11)&0x1F;
frA = (opcode>>16)&0x1F;
frD = (opcode>>21)&0x1F;
//hCPU->fpr[frD].fp0 = (hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0);
//hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1);
DefinePS0(fprDps0, frD);
DefinePS1(fprDps1, frD);
DefinePS0(fprAps0, frA);
DefinePS1(fprAps1, frA);
DefinePS0(fprBps0, frB);
DefinePS1(fprBps1, frB);
DefinePS0(fprCps0, frC);
DefinePS1(fprCps1, frC);
DefineTempFPR(fprTemp0, 0);
DefineTempFPR(fprTemp1, 1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1);
// todo: This instruction can be optimized so that it doesn't always use a temporary register
if( frD == frA && frD != frB )
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1);
}
else
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1);
}
// adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1);
return true;
}
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{ {
sint32 frD, frA, frB, frC; sint32 frD, frA, frB, frC;
frC = (opcode>>6)&0x1F; frC = (opcode>>6)&0x1F;
@ -1406,12 +1324,29 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o
DefinePS0(fprCps0, frC); DefinePS0(fprCps0, frC);
DefinePS1(fprCps1, frC); DefinePS1(fprCps1, frC);
if (frD != frA && frD != frB)
{
if (frD == frC)
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1);
}
else
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1);
}
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1);
}
else
{
DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp0, 0);
DefineTempFPR(fprTemp1, 1); DefineTempFPR(fprTemp1, 1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1);
// todo: This instruction can be optimized so that it doesn't always use a temporary register
if( frD == frA && frD != frB ) if( frD == frA && frD != frB )
{ {
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0);
@ -1428,12 +1363,16 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1);
} }
}
// negate result // negate result
if (withNegative)
{
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0);
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1);
}
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1444,10 +1383,6 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op
frB = (opcode>>11)&0x1F; frB = (opcode>>11)&0x1F;
frA = (opcode>>16)&0x1F; frA = (opcode>>16)&0x1F;
frD = (opcode>>21)&0x1F; frD = (opcode>>21)&0x1F;
//float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
//float s1 = (float)hCPU->fpr[frC].fp1;
//hCPU->fpr[frD].fp0 = s0;
//hCPU->fpr[frD].fp1 = s1;
DefinePS0(fprDps0, frD); DefinePS0(fprDps0, frD);
DefinePS1(fprDps1, frD); DefinePS1(fprDps1, frD);
@ -1467,8 +1402,8 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op
if (fprDps1 != fprCps1) if (fprDps1 != fprCps1)
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprCps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprCps1);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1479,10 +1414,6 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op
frB = (opcode>>11)&0x1F; frB = (opcode>>11)&0x1F;
frA = (opcode>>16)&0x1F; frA = (opcode>>16)&0x1F;
frD = (opcode>>21)&0x1F; frD = (opcode>>21)&0x1F;
//float s0 = (float)hCPU->fpr[frC].fp0;
//float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
//hCPU->fpr[frD].fp0 = s0;
//hCPU->fpr[frD].fp1 = s1;
DefinePS0(fprDps0, frD); DefinePS0(fprDps0, frD);
DefinePS1(fprDps1, frD); DefinePS1(fprDps1, frD);
@ -1490,21 +1421,19 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op
DefinePS1(fprBps1, frB); DefinePS1(fprBps1, frB);
DefinePS0(fprCps0, frC); DefinePS0(fprCps0, frC);
// todo - avoid temporaries when possible if (frB != frD)
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1);
}
else
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps0);
DefineTempFPR(fprTemp0, 0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprCps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0);
DefineTempFPR(fprTemp1, 1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprAps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1565,8 +1494,8 @@ bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0);
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1584,8 +1513,8 @@ bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0);
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1);
// adjust accuracy // adjust accuracy
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0);
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1);
return true; return true;
} }
@ -1674,12 +1603,18 @@ bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32
DefinePS0(frpDps0, frD); DefinePS0(frpDps0, frD);
DefinePS1(frpDps1, frD); DefinePS1(frpDps1, frD);
if (frD != frB)
{
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0);
}
else
{
DefineTempFPR(frpTemp, 0); DefineTempFPR(frpTemp, 0);
// todo - optimize cases where a temporary is not necessary
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp);
}
return true; return true;
} }
@ -1702,20 +1637,8 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32
bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{ {
printf("PS_CMPO0: Not implemented\n"); // Not implemented
return false; return false;
/*
sint32 crfD, frA, frB;
uint32 c=0;
frB = (opcode>>11)&0x1F;
frA = (opcode>>16)&0x1F;
crfD = (opcode>>23)&0x7;
IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD);
return true;
*/
} }
bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -1724,11 +1647,7 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o
frB = (opcode >> 11) & 0x1F; frB = (opcode >> 11) & 0x1F;
frA = (opcode >> 16) & 0x1F; frA = (opcode >> 16) & 0x1F;
crfD = (opcode >> 23) & 0x7; crfD = (opcode >> 23) & 0x7;
// DefinePS1(frpAps0, frA);
// DefinePS1(frpBps0, frB);
// ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPU_BOTTOM, frpAps0, frpBps0, crfD);
//crfD >>= 2;
DefinePS0(fprA, frA); DefinePS0(fprA, frA);
DefinePS0(fprB, frB); DefinePS0(fprB, frB);