From bb5a7ce4ffc49ada4548c01545ddbd4e14f44cf0 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 28 Apr 2025 12:58:53 +0200 Subject: [PATCH] PPCRec: Rework FP instructions to use scalar registers --- src/Cafe/HW/Espresso/EspressoISA.h | 12 + .../Recompiler/BackendX64/BackendX64.cpp | 13 + .../Recompiler/BackendX64/BackendX64.h | 2 + .../Recompiler/BackendX64/BackendX64FPU.cpp | 22 +- .../BackendX64/BackendX64GenFPU.cpp | 41 + .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 10 +- .../Recompiler/IML/IMLInstruction.cpp | 12 + .../Espresso/Recompiler/IML/IMLInstruction.h | 11 + .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 4 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 5 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 28 +- .../Recompiler/PPCRecompilerImlGen.cpp | 126 +- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 1684 +++++++++-------- 13 files changed, 1185 insertions(+), 785 deletions(-) diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h index e66e1424..5e09763b 100644 --- a/src/Cafe/HW/Espresso/EspressoISA.h +++ b/src/Cafe/HW/Espresso/EspressoISA.h @@ -10,6 +10,18 @@ namespace Espresso CR_BIT_INDEX_SO = 3, }; + enum class PSQ_LOAD_TYPE + { + TYPE_F32 = 0, + TYPE_UNUSED1 = 1, + TYPE_UNUSED2 = 2, + TYPE_UNUSED3 = 3, + TYPE_U8 = 4, + TYPE_U16 = 5, + TYPE_S8 = 6, + TYPE_S16 = 7, + }; + enum class PrimaryOpcode { // underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 6a8aac2b..09feb830 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -1208,6 +1208,13 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, { x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); } + else if (name >= PPCREC_NAME_FPR0_NEW && name < (PPCREC_NAME_FPR0_NEW + 64)) + { + sint32 regIndex = (name - PPCREC_NAME_FPR0_NEW) / 2; + sint32 pairIndex = (name - PPCREC_NAME_FPR0_NEW) % 2; + x64Gen_movddup_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double)); + // todo - use movsd here + } else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) { x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); @@ -1285,6 +1292,12 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); } + else if (name >= PPCREC_NAME_FPR0_NEW && name < (PPCREC_NAME_FPR0_NEW + 64)) + { + sint32 regIndex = (name - PPCREC_NAME_FPR0_NEW) / 2; + sint32 pairIndex = (name - PPCREC_NAME_FPR0_NEW) % 2; + x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0)); + } else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) { x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index e4d1f5a9..de415ca9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -205,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); +void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); @@ -230,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32); void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc); +void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc); void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 4d9a538d..2d98522e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -269,7 +269,8 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); uint8 mode = imlInstruction->op_storeLoad.mode; - if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) + if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 || + mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 ) // lazy hack for now. Load only one value for SINGLE_INTO_PS0 { // load byte swapped single into temporary FPR if( indexed ) @@ -682,6 +683,21 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) // FPR op FPR void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ) + { + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR); + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA); + x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr); + return; + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ) + { + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR); + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA); + x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr); + return; + } + // all other cases operate on two floating-point registers uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); @@ -1025,6 +1041,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE ) + { + x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1)); + } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) { x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp index 882820e2..4bbcc025 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp @@ -213,6 +213,37 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi } } +void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) +{ + // SSE2 + if( memRegister == X86_REG_RSP ) + { + // MOVSD , [RSP+] + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x10); + x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8); + x64Gen_writeU8(x64GenContext, 0x24); + x64Gen_writeU32(x64GenContext, memImmU32); + } + else if( memRegister == 15 ) + { + // MOVSD , [R15+] + x64Gen_writeU8(x64GenContext, 0x36); + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x10); + x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8); + x64Gen_writeU32(x64GenContext, memImmU32); + } + else + { + assert_dbg(); + } +} + void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE3 @@ -561,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7)); } +void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc) +{ + // SSE2 + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x2A); + x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7)); +} + void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc) { // SSE2 diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 07fd4002..88db1419 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -189,10 +189,18 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di { strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); } - else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) + else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 32)) { strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); } + else if (inst.op_r_name.name >= PPCREC_NAME_FPR0_NEW && inst.op_r_name.name < (PPCREC_NAME_FPR0_NEW + 32*2)) + { + strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR0_NEW - inst.op_r_name.name)/2)); + if ((inst.op_r_name.name-PPCREC_NAME_FPR0_NEW)&1) + strOutput.add(".ps1"); + else + strOutput.add(".ps0"); + } else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) { strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index cb481043..9d890d17 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -239,6 +239,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); registersUsed->readGPR2 = op_storeLoad.registerData; break; + case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0: case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: @@ -278,6 +279,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); registersUsed->readGPR3 = op_storeLoad.registerData; break; + case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0: case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: @@ -389,6 +391,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_fpr_r_r.regA; registersUsed->readGPR2 = op_fpr_r_r.regR; } + else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT || + operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT) + { + registersUsed->writtenGPR1 = op_fpr_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r.regA; + } else cemu_assert_unimplemented(); } @@ -446,6 +454,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_fpr_r.regR; registersUsed->writtenGPR1 = op_fpr_r.regR; } + else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE) + { + registersUsed->writtenGPR1 = op_fpr_r.regR; + } else cemu_assert_unimplemented(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index dd56f436..cca5c362 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -158,10 +158,14 @@ enum PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A + // Conversion (FPR_R_R) + PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr + PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr // PS PPCREC_IML_OP_FPR_SUM0, PPCREC_IML_OP_FPR_SUM1, + PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register // R_R_R only @@ -276,6 +280,7 @@ enum // IMLName PPCREC_NAME_R0 = 2000, PPCREC_NAME_SPR0 = 3000, PPCREC_NAME_FPR0 = 4000, + PPCREC_NAME_FPR0_NEW = 4800, // similar to FPR0, but counting PS0 and PS1 separate. So fp3.ps1 is at offset 3 * 2 + 1 PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 PPCREC_NAME_XER_CA = 6000, // carry bit from XER PPCREC_NAME_XER_OV = 6001, // overflow bit from XER @@ -757,6 +762,12 @@ struct IMLInstruction // load from memory void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) { + if (registerGQR.IsValid()) + { + if ( mode == 0) + __debugbreak(); + } + this->type = PPCREC_IML_TYPE_FPR_LOAD; this->operation = 0; this->op_storeLoad.registerData = registerDestination; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 2a0e2821..d4d13b1f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -209,7 +209,9 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 { // UGQR 2 to 7 are initialized by the OS and we assume that games won't ever permanently touch those // todo - hack - replace with more accurate solution - if (gqrIndex == 2) + if (gqrIndex == 0) + gqrValue = 0x00000000; + else if (gqrIndex == 2) gqrValue = 0x00040004; else if (gqrIndex == 3) gqrValue = 0x00050005; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index d411be14..5de1408b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -2093,7 +2093,10 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); if (imlSegment->HasSuffixInstruction()) { - cemu_assert_debug(!currentRange); // currentRange should be NULL? + if (currentRange) + { + cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress); + } for (auto& remainingRange : activeRanges) { cemu_assert_debug(!remainingRange->hasStore); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 6296f048..e4e9e776 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -53,14 +53,10 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate); +bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate); +bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1); +bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1); bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -88,3 +84,19 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); + +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken); +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count); + + +class IMLRedirectInstOutput +{ +public: + IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment); + ~IMLRedirectInstOutput(); + + +private: + ppcImlGenContext_t* m_context; + IMLSegment* m_prevSegment; +}; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 2021a8ce..c75dae62 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -87,8 +87,7 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex } // create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards -template -void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken) +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken) { IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); @@ -118,6 +117,100 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P basicBlockInfo.appendSegment = segMerge; } +IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index); + +IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext) +{ + m_prevSegment = ppcImlGenContext->currentOutputSegment; + cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment); + if (outputSegment == ppcImlGenContext->currentOutputSegment) + { + m_prevSegment = nullptr; + return; + } + m_context->currentBasicBlock->appendSegment = outputSegment; + m_context->currentOutputSegment = outputSegment; +} + +IMLRedirectInstOutput::~IMLRedirectInstOutput() +{ + if (m_prevSegment) + { + m_context->currentBasicBlock->appendSegment = m_prevSegment; + m_context->currentOutputSegment = m_prevSegment; + } +} + + +// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction + + const sint32 numBranchSegments = count;// - 1; If we move the default case to the first segment we could avoid one extra non-conditional branch + const sint32 numCaseSegments = count; + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1); + IMLSegment** extraBranchSegments = segments.data(); + IMLSegment** caseSegments = segments.data() + numBranchSegments - 1; + IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments]; + + // move links to the merge segment + mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(nullptr); + currentWriteSegment->SetLinkBranchNotTaken(nullptr); + + for (sint32 i=0; iSetLinkBranchTaken(caseSegments[i]); + seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1)); + seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ); + seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true); + } + else + { + seg->SetLinkBranchTaken(caseSegments[i]); + seg->AppendInstruction()->make_jump(); + } + } + // link case segments + for (sint32 i=0; iSetLinkBranchTaken(mergeSegment); + //seg->AppendInstruction()->make_jump(); -> Jumps are added after the instructions + } + else + { + // todo - the last segment doesnt need to jump + seg->SetLinkBranchTaken(mergeSegment); + //seg->AppendInstruction()->make_jump(); + } + } + ppcImlGenContext.currentOutputSegment = mergeSegment; + basicBlockInfo.appendSegment = mergeSegment; +} + IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat) { auto it = ppcImlGenContext->mappedRegs.find(mappedName); @@ -212,8 +305,8 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index) return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } -// get throw-away register. Only valid for the scope of a single translated instruction -// be careful to not collide with manually loaded temporary register +// get throw-away register +// be careful to not collide with other temporary register IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { cemu_assert_debug(index < 4); @@ -1891,23 +1984,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 12: // multiply scalar - if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false) + case 12: // PS_MULS0 + if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 13: // multiply scalar - if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false) + case 13: // PS_MULS1 + if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 14: // multiply add scalar - if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false) + case 14: // PS_MADDS0 + if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 15: // multiply add scalar - if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false) + case 15: // PS_MADDS1 + if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2515,12 +2608,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) ppcImlGenContext->hasFPUInstruction = true; break; case 56: - if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 57: - if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2573,12 +2666,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) } break; case 60: - if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 61: - if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2688,7 +2781,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) } // returns false if code flow is not interrupted -// continueDefaultPath: Controls if bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget) { hasBranchTarget = false; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 422df1dd..eafd6957 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -3,24 +3,42 @@ #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +#include "IML/IML.h" ATTR_MS_ABI double frsqrte_espresso(double input); ATTR_MS_ABI double fres_espresso(double input); IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); +#define DefinePS0(name, regIndex) IMLReg name = _GetFPRRegPS0(ppcImlGenContext, regIndex); +#define DefinePS1(name, regIndex) IMLReg name = _GetFPRRegPS1(ppcImlGenContext, regIndex); -// todo -IMLReg _GetRegFPRGekko(ppcImlGenContext_t* ppcImlGenContext, uint32 index) +#define DefinePSX(name, regIndex, isPS1) IMLReg name = isPS1 ? _GetFPRRegPS1(ppcImlGenContext, regIndex) : _GetFPRRegPS0(ppcImlGenContext, regIndex); + +#define DefineTempFPR(name, index) IMLReg name = _GetFPRTemp(ppcImlGenContext, index); + +IMLReg _GetFPRRegPS0(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { - cemu_assert_debug(index < 32); - return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + index); + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR0_NEW + regIndex * 2 + 0, IMLRegFormat::F64); } -// deprecated way of getting a float register (always in paired mode) -IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLReg _GetFPRRegPS1(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { - return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR0_NEW + regIndex * 2 + 1, IMLRegFormat::F64); +} + +IMLReg _GetFPRTemp(ppcImlGenContext_t* ppcImlGenContext, uint32 index) +{ + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + index, IMLRegFormat::F64); +} + +IMLReg _GetFPRReg(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex, bool selectPS1) +{ + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR0_NEW + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64); } /* @@ -38,6 +56,7 @@ void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext */ void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { + cemu_assert_suspicious(); // should not be used any longer ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, fprRegister); if( flushDenormals ) assert_dbg(); @@ -48,7 +67,6 @@ bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, sint32 rA, frD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if (withUpdate) { @@ -57,23 +75,20 @@ bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); imm = 0; // set imm to 0 so we dont add it twice } - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fpPs0, frD); if (isDouble) { // LFD/LFDU - ppcImlGenContext->emitInst().make_fpr_r_memory(fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); } else { // LFS/LFSU + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); if( ppcImlGenContext->LSQE ) { - ppcImlGenContext->emitInst().make_fpr_r_memory(fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); - } - else - { - ppcImlGenContext->emitInst().make_fpr_r_memory(fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + DefinePS1(fpPs1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fpPs1, fpPs0); } } return true; @@ -92,33 +107,25 @@ bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenCont IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); if (withUpdate) - { ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); - } - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fpPs0, frD); if (isDouble) { if (withUpdate) - ppcImlGenContext->emitInst().make_fpr_r_memory(fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); else - ppcImlGenContext->emitInst().make_fpr_r_memory_indexed(fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); + ppcImlGenContext->emitInst().make_fpr_r_memory_indexed(fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); } else { + if (withUpdate) + ppcImlGenContext->emitInst().make_fpr_r_memory( fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + else + ppcImlGenContext->emitInst().make_fpr_r_memory_indexed( fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); if( ppcImlGenContext->LSQE ) { - if (withUpdate) - ppcImlGenContext->emitInst().make_fpr_r_memory( fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); - else - ppcImlGenContext->emitInst().make_fpr_r_memory_indexed( fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); - } - else - { - if (withUpdate) - ppcImlGenContext->emitInst().make_fpr_r_memory( fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); - else - ppcImlGenContext->emitInst().make_fpr_r_memory_indexed( fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + DefinePS1(fpPs1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fpPs1, fpPs0); } } return true; @@ -130,16 +137,16 @@ bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenCont uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fpPs0, frD); if (withUpdate) { ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); imm = 0; } if (isDouble) - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); else - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); return true; } @@ -159,22 +166,21 @@ bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGen { ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); } - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + DefinePS0(fpPs0, frS); auto mode = isDouble ? PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0 : PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0; if( ppcImlGenContext->LSQE ) { if (hasUpdate) - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister1, 0, mode, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); else - ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fprRegister, gprRegister1, gprRegister2, 0, mode, true); + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } else { if (hasUpdate) - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister1, 0, mode, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); else - ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fprRegister, gprRegister1, gprRegister2, 0, mode, true); + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } return true; } @@ -183,7 +189,7 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - // get memory gpr registers + DefinePS0(fpPs0, frS); IMLReg gprRegister1; IMLReg gprRegister2; if( rA != 0 ) @@ -197,12 +203,10 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); gprRegister2 = IMLREG_INVALID; } - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( rA != 0 ) - ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); else - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); return true; } @@ -211,13 +215,10 @@ bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprD, fprA, fprB); return true; } @@ -226,12 +227,10 @@ bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // subtract bottom double of frB from bottom double of frD - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprD, fprA, fprB); return true; } @@ -246,15 +245,14 @@ bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod frA = frC; frC = temp; } - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); return true; } @@ -263,27 +261,25 @@ bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp, fprB); // move result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprTemp); return true; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); // copy ps0 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // copy ps0 // divide bottom double of frD by bottom double of frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprD, fprB); return true; } @@ -291,38 +287,37 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC); // add result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - IMLReg temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // always copy ps0 and ps1 // multiply bottom double of frD with bottom double of frC - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprD, fprB); return true; } @@ -330,32 +325,34 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // if frB is already in frD we need a temporary register to store the product of frA*frC + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); if( frB == frD ) { - // not implemented + // if frB is already in frD we need a temporary register to store the product of frA*frC + DefineTempFPR(fprTemp, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp, fprB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprTemp); return false; } - // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - IMLReg temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } - // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + // move frA to frD + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // sub frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprD, fprB); return true; } @@ -363,51 +360,52 @@ bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - // hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frD].fpr); - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - //// negate frB/frD - //ppcImlGenContext->emitInst().make_fpr_r(NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD, true); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC); // sub frB from temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp, fprB); // negate result - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprTemp); // move result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - IMLReg temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // multiply bottom double of frD with bottom double of frC - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // sub frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprD, fprB); // negate result - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprD); return true; } +#define PSE_CopyResultToPs1() if( ppcImlGenContext->PSE ) \ + { \ + DefinePS1(fprDPS1, frD); \ + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDPS1, fprD); \ + } + bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB_unused, frC; @@ -420,24 +418,18 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frA = frC; frC = temp; } - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 - + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } - + PSE_CopyResultToPs1(); return true; } @@ -446,44 +438,31 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); - /*hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr); - if( hCPU->PSE ) - hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;*/ - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp, fprB); // move result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // subtract bottom double of frB from bottom double of frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -499,22 +478,17 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frA = frB; frB = temp; } - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // add bottom double of frD and bottom double of frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -523,20 +497,12 @@ bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frB==0); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // subtract bottom - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); - // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprD, fprA, fprB); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -548,30 +514,26 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr; //if( hCPU->PSE ) // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - IMLReg fprRegisterTemp; + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + IMLReg fprRegisterTemp; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterTemp, fprB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -582,30 +544,27 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); //if( hCPU->PSE ) // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -613,37 +572,27 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - - //[FP1(RD) = ]FP0(RD) = -(FP0(RA) * FP0(RC) - FP0(RB)); - //hCPU->fpr[frD].fp0 = (float)-(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //if( PPC_PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterTemp); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -684,18 +633,18 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; - IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); // todo: set fpscr @@ -706,9 +655,9 @@ bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 frD, rA, frB; PPC_OPC_TEMPL_X(opcode, frD, rA, frB); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprB); return true; } @@ -717,14 +666,11 @@ bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // abs frD - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS_BOTTOM, fprD); return true; } @@ -733,14 +679,11 @@ bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // abs frD - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, fprD); return true; } @@ -749,15 +692,12 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // copy result to top - if( ppcImlGenContext->PSE ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -766,15 +706,14 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( fprRegisterD != fprRegisterB ) + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( fprD != fprB ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprB); } - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD); - if( ppcImlGenContext->PSE ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD); + PSE_CopyResultToPs1(); return true; } @@ -787,14 +726,13 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { return false; } - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprB); // negate frD - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprD); return true; } @@ -806,11 +744,11 @@ bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { return false; } - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprD, fprA, fprB, fprC); return true; } @@ -818,12 +756,11 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); return true; } @@ -831,237 +768,369 @@ bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, fprD, fprB); return true; } -bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue); + +void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg, sint32 clampMin, sint32 clampMax) +{ + IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + // min(reg, clampMax) + ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken */ + } + ); + // max(reg, clampMin) + ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken */ + } + ); +} + +void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +{ + if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) + { + ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS0, gprA, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + if(readPS1) + { + ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS1, gprA, imm + 4, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + } + } + if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) + { + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + if(readPS1) + { + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + } + } + else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) + { + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + if(readPS1) + { + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + } + } +} + +// PSQ_L and PSQ_LU +bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) { int rA, frD; uint32 immUnused; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - sint32 gqrIndex = ((opcode >> 12) & 7); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; - bool readPS1 = (opcode & 0x8000) == false; - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // psq load - ppcImlGenContext->emitInst().make_fpr_r_memory(fprRegister, gprRegister, imm, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} + IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fprDPS0, frD); + DefinePS1(fprDPS1, frD); + if (!readPS1) + { + // if PS1 is not explicitly read then set it to 1.0 + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_LOAD_ONE, fprDPS1); + } + if (withUpdate) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); + imm = 0; + } + uint32 knownGQRValue = 0; + if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, knownGQRValue) ) + { + // generate complex dynamic handler when we dont know the GQR value ahead of time + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // extract the load type from the GQR register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); + IMLSegment* caseSegment[5]; + sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); + for (sint32 i=0; i<5; i++) + { + IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, static_cast(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1); + // create the case jump instructions here because we need to add it last + caseSegment[i]->AppendInstruction()->make_jump(); + } + return true; + } -bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - if (rA == 0) + Espresso::PSQ_LOAD_TYPE type = static_cast((knownGQRValue >> 0) & 0x7); + sint32 scale = (knownGQRValue >> 8) & 0x3F; + cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) + if (scale != 0) return false; - sint32 gqrIndex = ((opcode >> 12) & 7); - uint32 imm = opcode & 0xFFF; - if (imm & 0x800) - imm |= ~0xFFF; - - bool readPS1 = (opcode & 0x8000) == false; - - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired load - ppcImlGenContext->emitInst().make_fpr_r_memory(fprRegister, gprRegister, 0, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} - -bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - uint32 imm = opcode & 0xFFF; - if (imm & 0x800) - imm |= ~0xFFF; - sint32 gqrIndex = ((opcode >> 12) & 7); - - bool storePS1 = (opcode & 0x8000) == false; - - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired store - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister, imm, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} - -bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - if (rA == 0) + if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) + { return false; + } + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, type, readPS1, gprA, imm, fprDPS0, fprDPS1); + return true; +} + +void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +{ + cemu_assert_debug(!storePS1 || fprDPS1.IsValid()); + if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) + { + ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS0, gprA, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS1, gprA, imm + 4, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); + } + } + else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) + { + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); + // todo - scaling + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); + // todo - scaling + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 2, 16, true); + } + } + else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) + { + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); + // todo - scaling + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 1, 8, true); + } + } +} + +// PSQ_ST and PSQ_STU +bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) +{ + int rA, frD; + uint32 immUnused; + PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; sint32 gqrIndex = ((opcode >> 12) & 7); - bool storePS1 = (opcode & 0x8000) == false; - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); + IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fprDPS0, frD); + IMLReg fprDPS1 = storePS1 ? _GetFPRRegPS1(ppcImlGenContext, frD) : IMLREG_INVALID; - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired store - ppcImlGenContext->emitInst().make_fpr_memory_r(fprRegister, gprRegister, 0, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); + if (withUpdate) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); + imm = 0; + } + + uint32 gqrValue = 0; + if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue) ) + { + // generate complex dynamic handler when we dont know the GQR value ahead of time + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // extract the load type from the GQR register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); + + IMLSegment* caseSegment[5]; + sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); + for (sint32 i=0; i<5; i++) + { + IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, static_cast(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1); + ppcImlGenContext->emitInst().make_jump(); // finalize case + } + return true; + } + + Espresso::PSQ_LOAD_TYPE type = static_cast((gqrValue >> 16) & 0x7); + sint32 scale = (gqrValue >> 24) & 0x3F; + cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) + + if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) + { + return false; + } + + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, type, storePS1, gprA, imm, fprDPS0, fprDPS1); return true; } -bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MULS0 and PS_MULS1 +bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frC; frC = (opcode>>6)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA we can multiply frD immediately and safe a copy instruction - if( frD == frA ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - } - else - { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePSX(fprC, frC, isVariant1); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + DefineTempFPR(fprTmp0, 0); + DefineTempFPR(fprTmp1, 1); + + // todo - optimize cases where a temporary is not necessary + // todo - round fprC to 25bit accuracy + + // copy ps0 and ps1 to temporary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTmp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTmp1, fprAps1); + + // multiply + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTmp0, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTmp1, fprC); + + // copy back to result + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTmp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTmp1); + + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + return true; } -bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frC; - frC = (opcode>>6)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA we can multiply frD immediately and safe a copy instruction - if( frD == frA ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - } - else - { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} - -bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MADDS0 and PS_MADDS1 +bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); - //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} -bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frB, frC; - frC = (opcode>>6)&0x1F; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp0); - //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp1 in bottom and top half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePSX(fprC, frC, isVariant1); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + DefineTempFPR(fprTmp0, 0); + DefineTempFPR(fprTmp1, 1); + + // todo - round C to 25bit + // todo - optimize cases where a temporary is not necessary + + // copy ps0 and ps1 to temporary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTmp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTmp1, fprAps1); + + // multiply + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTmp0, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTmp1, fprC); + + // add + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTmp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTmp1, fprBps1); + + // copy back to result + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTmp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTmp1); + + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); + + // //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); + // //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp1); + // // load registers + // // IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + // // IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + // // IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); + // // IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + // // we need a temporary register to store frC.fp0 in low and high half + // IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); + // // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction + // if( frD == frA && frD != frB ) + // { + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); + // // add frB + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + // } + // else + // { + // // we multiply temporary by frA + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); + // // add frB + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); + // // copy result to frD + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + // } + // // adjust accuracy + // PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); return true; } @@ -1073,25 +1142,34 @@ bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + if( frD == frA ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps1, fprBps1); } else if( frD == frB ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps1, fprAps1); } else { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterA); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps1, fprBps1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1103,13 +1181,20 @@ bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterA, fprRegisterB); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprDps0, fprAps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprDps1, fprAps1, fprBps1); + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1119,28 +1204,37 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frC = (opcode >> 6) & 0x1F; frA = (opcode >> 16) & 0x1F; frD = (opcode >> 21) & 0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // we need a temporary register - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA we can multiply frD immediately and safe a copy instruction + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + // todo: Optimize for when a temporary isnt necessary + // todo: Round to 25bit? + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprCps1); if (frD == frA) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps1, fprTemp1); } else { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1152,28 +1246,35 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode >> 21) & 0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA we can divide frD immediately and safe a copy instruction + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + if (frD == frA) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprDps1, fprBps1); } else { - // we need a temporary register - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterA); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprAps1); // we divide temporary by frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp1, fprBps1); // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1187,33 +1288,44 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprCps1); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction if( frD == frA && frD != frB ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps1, fprBps1); } else { // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp1, fprAps1); // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTemp1, fprBps1); // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1225,33 +1337,45 @@ bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprCps1); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction + // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction if( frD == frA && frD != frB ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps1, fprBps1); } else { // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp1, fprAps1); // add frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTemp1, fprBps1); // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); } + // negate - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprDps1); // adjust accuracy //PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); // Splatoon requires that we emulate flush-to-denormals for this instruction @@ -1269,33 +1393,41 @@ bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 op //hCPU->fpr[frD].fp0 = (hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); //hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprCps1); + // todo: This instruction can be optimized so that it doesn't always use a temporary register if( frD == frA && frD != frB ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // sub frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprDps1, fprBps1); } else { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // sub frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp1, fprBps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); } + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1307,35 +1439,43 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprCps1); + // todo: This instruction can be optimized so that it doesn't always use a temporary register if( frD == frA && frD != frB ) { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // sub frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprDps1, fprBps1); } else { - // we multiply temporary by frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // sub frB - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp1, fprBps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); } // negate result - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprDps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1350,14 +1490,27 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op //float s1 = (float)hCPU->fpr[frC].fp1; //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SUM0, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprBps1, frB); + DefinePS1(fprCps1, frC); + + if( frD == frA ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprBps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprDps0, fprBps1); + } + if (fprDps1 != fprCps1) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprCps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1372,14 +1525,28 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SUM1, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + + // todo - avoid temporaries when possible + + DefineTempFPR(fprTemp0, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp0, fprCps0); + + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp1, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprTemp1, fprBps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprTemp1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprTemp0); + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1388,12 +1555,20 @@ bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = -hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp1 = -hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterB); + + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + if (frB != frD) + { + // copy + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprBps1); + } + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprDps1); return true; } @@ -1402,10 +1577,17 @@ bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ABS_PAIR, fprRegisterD, fprRegisterB); + + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprBps1); + + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS_BOTTOM, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS_BOTTOM, fprDps1); return true; } @@ -1417,11 +1599,16 @@ bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)hCPU->fpr[frB].fp0); //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FRES_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); + // adjust accuracy + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1430,13 +1617,17 @@ bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp0)); - //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp1)); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FRSQRTE_PAIR, fprRegisterD, fprRegisterB); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); + // adjust accuracy + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1445,14 +1636,15 @@ bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - // load registers if( frB != frD ) { - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterB); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprDps1, fprBps1); } return true; } @@ -1465,11 +1657,17 @@ bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT_PAIR, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprDps0, fprAps0, fprBps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprDps1, fprAps1, fprBps1, fprCps1); return true; } @@ -1479,26 +1677,13 @@ bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)hCPU->fpr[frA].fp0; - //float s1 = (float)hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - if( frA == frB ) - { - // simply duplicate bottom into bottom and top of destination register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - } - else - { - // copy bottom of frB to top first - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, fprRegisterD, fprRegisterB); - // copy bottom of frA - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); - } + DefinePS0(frpAps0, frA); + DefinePS0(frpBps0, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps1, frpBps0); + if (frpDps0 != frpAps0) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps0, frpAps0); return true; } @@ -1508,17 +1693,14 @@ bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0; - // hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - if( fprRegisterD != fprRegisterB ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); - if( fprRegisterD != fprRegisterA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); + DefinePS0(frpAps0, frA); + DefinePS1(frpBps1, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + if (frpDps0 != frpAps0) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps0, frpAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps1, frpBps1); return true; } @@ -1529,34 +1711,17 @@ bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( frA == frB ) - { - // swap bottom and top - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterA); - } - else if( frA == frD ) - { - // copy frB bottom to frD bottom - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // swap lower and upper half of frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); - } - else if( frB == frD ) - { - // copy upper half of frA to upper half of frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterA); - // swap lower and upper half of frD - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); - } - else - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); - } + DefinePS1(frpAps1, frA); + DefinePS0(frpBps0, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + + DefineTempFPR(frpTemp, 0); + + // todo - optimize cases where a temporary is not necessary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpTemp, frpBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps1, frpTemp); return true; } @@ -1567,27 +1732,13 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( fprRegisterA == fprRegisterB ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - } - else if( fprRegisterD != fprRegisterB ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); - } - else if( fprRegisterD == fprRegisterB ) - { - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, fprRegisterD, fprRegisterA); - } - else - { - debugBreakpoint(); - return false; - } + DefinePS1(frpAps1, frA); + DefinePS1(frpBps1, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, frpDps1, frpBps1); return true; } @@ -1595,7 +1746,7 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o { printf("PS_CMPO0: Not implemented\n"); return false; - +/* sint32 crfD, frA, frB; uint32 c=0; frB = (opcode>>11)&0x1F; @@ -1606,34 +1757,55 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; +*/ } bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PS_CMPU0: Not implemented\n"); - return false; - sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + // DefinePS1(frpAps0, frA); + // DefinePS1(frpBps0, frB); + // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPU_BOTTOM, frpAps0, frpBps0, crfD); + + //crfD >>= 2; + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr return true; } bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PS_CMPU1: Not implemented\n"); - return false; - sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPU_TOP, fprRegisterA, fprRegisterB, crfD); + + DefinePS1(fprA, frA); + DefinePS1(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); return true; } \ No newline at end of file