#include "Cafe/HW/Espresso/EspressoISA.h" #include "../Interpreter/PPCInterpreterInternal.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" #include "IML/IML.h" ATTR_MS_ABI double frsqrte_espresso(double input); ATTR_MS_ABI double fres_espresso(double input); IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); #define DefinePS0(name, regIndex) IMLReg name = _GetFPRRegPS0(ppcImlGenContext, regIndex); #define DefinePS1(name, regIndex) IMLReg name = _GetFPRRegPS1(ppcImlGenContext, regIndex); #define DefinePSX(name, regIndex, isPS1) IMLReg name = isPS1 ? _GetFPRRegPS1(ppcImlGenContext, regIndex) : _GetFPRRegPS0(ppcImlGenContext, regIndex); #define DefineTempFPR(name, index) IMLReg name = _GetFPRTemp(ppcImlGenContext, index); IMLReg _GetFPRRegPS0(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { cemu_assert_debug(regIndex < 32); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR0_NEW + regIndex * 2 + 0, IMLRegFormat::F64); } IMLReg _GetFPRRegPS1(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { cemu_assert_debug(regIndex < 32); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR0_NEW + regIndex * 2 + 1, IMLRegFormat::F64); } IMLReg _GetFPRTemp(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { cemu_assert_debug(index < 4); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + index, IMLRegFormat::F64); } IMLReg _GetFPRReg(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex, bool selectPS1) { cemu_assert_debug(regIndex < 32); return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR0_NEW + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64); } /* * Rounds the bottom double to single precision (if single precision accuracy is emulated) */ void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); if( flushDenormals ) assert_dbg(); } bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { sint32 rA, frD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); if (withUpdate) { // add imm to memory register cemu_assert_debug(rA != 0); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); imm = 0; // set imm to 0 so we dont add it twice } DefinePS0(fpPs0, frD); if (isDouble) { // LFD/LFDU ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); } else { // LFS/LFSU ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); if( ppcImlGenContext->LSQE ) { DefinePS1(fpPs1, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fpPs1, fpPs0); } } return true; } bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { sint32 rA, frD, rB; PPC_OPC_TEMPL_X(opcode, frD, rA, rB); if( rA == 0 ) { debugBreakpoint(); return false; } // get memory gpr registers IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); if (withUpdate) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); DefinePS0(fpPs0, frD); if (isDouble) { if (withUpdate) ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); else ppcImlGenContext->emitInst().make_fpr_r_memory_indexed(fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); } else { if (withUpdate) ppcImlGenContext->emitInst().make_fpr_r_memory( fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); else ppcImlGenContext->emitInst().make_fpr_r_memory_indexed( fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); if( ppcImlGenContext->LSQE ) { DefinePS1(fpPs1, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fpPs1, fpPs0); } } return true; } bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { sint32 rA, frD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); DefinePS0(fpPs0, frD); if (withUpdate) { ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); imm = 0; } if (isDouble) ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); else ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); return true; } bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble) { sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); if( rA == 0 ) { debugBreakpoint(); return false; } // get memory gpr registers IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); if (hasUpdate) { ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); } DefinePS0(fpPs0, frS); auto mode = isDouble ? PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0 : PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0; if( ppcImlGenContext->LSQE ) { if (hasUpdate) ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); else ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } else { if (hasUpdate) ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); else ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } return true; } bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); DefinePS0(fpPs0, frS); IMLReg gprRegister1; IMLReg gprRegister2; if( rA != 0 ) { gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); } else { // rA is not used gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); gprRegister2 = IMLREG_INVALID; } if( rA != 0 ) ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); else ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); return true; } bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprA, fprB); return true; } bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); return true; } bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB_unused, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB_unused, frC); if( frD == frC ) { // swap frA and frB sint32 temp = frA; frA = frC; frC = temp; } DefinePS0(fprA, frA); DefinePS0(fprC, frC); DefinePS0(fprD, frD); // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); return true; } bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { DefineTempFPR(fprTemp, 0); // move frA to temporary register ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp, fprB); // move result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return true; } // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // copy ps0 // divide bottom double of frD by bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); return true; } bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { DefineTempFPR(fprTemp, 0); // move frA to temporary register ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); // add result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD if( frD == frC ) { // swap frA and frC IMLReg temp = fprA; fprA = fprC; fprC = temp; } // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // always copy ps0 and ps1 // multiply bottom double of frD with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // add frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); return true; } bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); if( frB == frD ) { // if frB is already in frD we need a temporary register to store the product of frA*frC DefineTempFPR(fprTemp, 0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return false; } if( frD == frC ) { // swap frA and frC IMLReg temp = fprA; fprA = fprC; fprC = temp; } // move frA to frD if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // sub frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprB); return true; } bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { DefineTempFPR(fprTemp, 0); // move frA to temporary register ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); // sub frB from temporary register ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp, fprB); // negate result ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprTemp); // move result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD if( frD == frC ) { // swap frA and frC IMLReg temp = fprA; fprA = fprC; fprC = temp; } // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // sub frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprB); // negate result ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } #define PSE_CopyResultToPs1() if( ppcImlGenContext->PSE ) \ { \ DefinePS1(fprDPS1, frD); \ ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDPS1, fprD); \ } bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB_unused, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB_unused, frC); if( frD == frC ) { // swap frA and frC sint32 temp = frA; frA = frC; frC = temp; } DefinePS0(fprA, frA); DefinePS0(fprC, frC); DefinePS0(fprD, frD); // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); // if paired single mode, copy frD ps0 to ps1 PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { DefineTempFPR(fprTemp, 0); // move frA to temporary register ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp, fprB); // move result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // subtract bottom double of frB from bottom double of frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); if( frD == frB ) { // swap frA and frB sint32 temp = frA; frA = frB; frB = temp; } DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprD, frD); // move frA to frD (if different register) if( frD != frA ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // add bottom double of frD and bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frB==0); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); //FPRD(RD) = FPRD(RA) * FPRD(RC) + FPRD(RB); //hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr; //if( hCPU->PSE ) // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register IMLReg fprRegisterTemp; if( frD != frA && frD != frB && frD != frC ) fprRegisterTemp = fprD; else fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprRegisterTemp, fprB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result if( fprD != fprRegisterTemp ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); //hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); //if( hCPU->PSE ) // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register if( frD != frA && frD != frB && frD != frC ) fprRegisterTemp = fprD; else fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result if( fprD != fprRegisterTemp ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register if( frD != frA && frD != frB && frD != frC ) fprRegisterTemp = fprD; else fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprRegisterTemp); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result if( fprD != fprRegisterTemp ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { printf("FCMPO: Not implemented\n"); return false; //sint32 crfD, frA, frB; //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); //crfD >>= 2; //IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); //IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); //IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); //IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); //IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); //IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); // todo - set fpscr //sint32 crfD, frA, frB; //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); //crfD >>= 2; //uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); //uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); //ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; } bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; DefinePS0(fprA, frA); DefinePS0(fprB, frB); IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); // todo: set fpscr return true; } bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, rA, frB; PPC_OPC_TEMPL_X(opcode, frD, rA, frB); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); return true; } bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); DefinePS0(fprB, frB); DefinePS0(fprD, frD); if( frD != frB ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprD); return true; } bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); DefinePS0(fprB, frB); DefinePS0(fprD, frD); if( frD != frB ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATIVE_ABS, fprD); return true; } bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); DefinePS0(fprB, frB); DefinePS0(fprD, frD); if( fprD != fprB ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); } ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD); PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); if( opcode&PPC_OPC_RC ) { return false; } DefinePS0(fprB, frB); DefinePS0(fprD, frD); // move frB to frD (if different register) if( frD != frB ) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); // negate frD ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); if( opcode&PPC_OPC_RC ) { return false; } DefinePS0(fprA, frA); DefinePS0(fprB, frB); DefinePS0(fprC, frC); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprD, fprA, fprB, fprC); return true; } bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprD); return true; } bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); DefinePS0(fprB, frB); DefinePS0(fprD, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCTIWZ, fprD, fprB); return true; } bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue); void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg, sint32 clampMin, sint32 clampMax) { IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); // min(reg, clampMax) ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT); ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, [&](ppcImlGenContext_t& genCtx) { /* branch taken */ genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax); }, [&](ppcImlGenContext_t& genCtx) { /* branch not taken */ } ); // max(reg, clampMin) ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT); ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, [&](ppcImlGenContext_t& genCtx) { /* branch taken */ genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin); }, [&](ppcImlGenContext_t& genCtx) { /* branch not taken */ } ); } void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) { if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) { ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS0, gprA, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); if(readPS1) { ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS1, gprA, imm + 4, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); } } if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) { bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); if(readPS1) { ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); } } else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) { bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); if(readPS1) { ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); } } } // PSQ_L and PSQ_LU bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) { int rA, frD; uint32 immUnused; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); sint32 gqrIndex = ((opcode >> 12) & 7); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; bool readPS1 = (opcode & 0x8000) == false; IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); DefinePS0(fprDPS0, frD); DefinePS1(fprDPS1, frD); if (!readPS1) { // if PS1 is not explicitly read then set it to 1.0 ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_LOAD_ONE, fprDPS1); } if (withUpdate) { ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); imm = 0; } uint32 knownGQRValue = 0; if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, knownGQRValue) ) { // generate complex dynamic handler when we dont know the GQR value ahead of time IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); // extract the load type from the GQR register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); IMLSegment* caseSegment[5]; sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); for (sint32 i=0; i<5; i++) { IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, static_cast(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1); // create the case jump instructions here because we need to add it last caseSegment[i]->AppendInstruction()->make_jump(); } return true; } Espresso::PSQ_LOAD_TYPE type = static_cast((knownGQRValue >> 0) & 0x7); sint32 scale = (knownGQRValue >> 8) & 0x3F; cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) if (scale != 0) return false; if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) { return false; } PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, type, readPS1, gprA, imm, fprDPS0, fprDPS1); return true; } void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) { cemu_assert_debug(!storePS1 || fprDPS1.IsValid()); if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) { ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS0, gprA, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); if(storePS1) { ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS1, gprA, imm + 4, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); } } else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) { bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); // todo - scaling if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); else PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true); if(storePS1) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); // todo - scaling if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); else PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 2, 16, true); } } else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) { bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); else PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true); if(storePS1) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); // todo - scaling if (isSigned) PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); else PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 1, 8, true); } } } // PSQ_ST and PSQ_STU bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) { int rA, frD; uint32 immUnused; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; sint32 gqrIndex = ((opcode >> 12) & 7); bool storePS1 = (opcode & 0x8000) == false; IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); DefinePS0(fprDPS0, frD); IMLReg fprDPS1 = storePS1 ? _GetFPRRegPS1(ppcImlGenContext, frD) : IMLREG_INVALID; if (withUpdate) { ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); imm = 0; } uint32 gqrValue = 0; if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue) ) { // generate complex dynamic handler when we dont know the GQR value ahead of time IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); // extract the load type from the GQR register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); IMLSegment* caseSegment[5]; sint32 compareValues[5] = {4, 5, 6, 7, 0}; // the last value is the default case PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5); for (sint32 i=0; i<5; i++) { IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, static_cast(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1); ppcImlGenContext->emitInst().make_jump(); // finalize case } return true; } Espresso::PSQ_LOAD_TYPE type = static_cast((gqrValue >> 16) & 0x7); sint32 scale = (gqrValue >> 24) & 0x3F; cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) { return false; } PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, type, storePS1, gprA, imm, fprDPS0, fprDPS1); return true; } // PS_MULS0 and PS_MULS1 bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frC; frC = (opcode>>6)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePSX(fprC, frC, isVariant1); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefineTempFPR(fprTmp0, 0); DefineTempFPR(fprTmp1, 1); // todo - optimize cases where a temporary is not necessary // todo - round fprC to 25bit accuracy // copy ps0 and ps1 to temporary ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp1, fprAps1); // multiply ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp0, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp1, fprC); // copy back to result ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } // PS_MADDS0 and PS_MADDS1 bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePSX(fprC, frC, isVariant1); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefineTempFPR(fprTmp0, 0); DefineTempFPR(fprTmp1, 1); // todo - round C to 25bit // todo - optimize cases where a temporary is not necessary // copy ps0 and ps1 to temporary ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp1, fprAps1); // multiply ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp0, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp1, fprC); // add ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTmp0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTmp1, fprBps1); // copy back to result ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); if( frD == frA ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else if( frD == frB ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps1); } else { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprAps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprAps1, fprBps1); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frC; frC = (opcode >> 6) & 0x1F; frA = (opcode >> 16) & 0x1F; frD = (opcode >> 21) & 0x1F; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp1, 1); // todo: Optimize for when a temporary isnt necessary // todo: Round to 25bit? ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); if (frD == frA) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); } else { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; frD = (opcode >> 21) & 0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); if (frD == frA) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprDps1, fprBps1); } else { DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp1, 1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprAps1); // we divide temporary by frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp1, fprBps1); // copy result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp1, 1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction if( frD == frA && frD != frB ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { // we multiply temporary by frA ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); // add frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); // copy result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp1, 1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction if( frD == frA && frD != frB ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { // we multiply temporary by frA ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); // add frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); // copy result to frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // negate ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); // adjust accuracy //PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); // Splatoon requires that we emulate flush-to-denormals for this instruction //ppcImlGenContext->emitInst().make_fpr_r(NULL,PPCREC_IML_OP_FPR_ROUND_FLDN_TO_SINGLE_PRECISION_PAIR, fprRegisterD, false); return true; } bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = (hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); //hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp1, 1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); // todo: This instruction can be optimized so that it doesn't always use a temporary register if( frD == frA && frD != frB ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); } else { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); DefineTempFPR(fprTemp0, 0); DefineTempFPR(fprTemp1, 1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); // todo: This instruction can be optimized so that it doesn't always use a temporary register if( frD == frA && frD != frB ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); } else { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // negate result ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; //float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); //float s1 = (float)hCPU->fpr[frC].fp1; //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprBps1, frB); DefinePS1(fprCps1, frC); if( frD == frA ) { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps1); } else { ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps1); } if (fprDps1 != fprCps1) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprCps1); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; //float s0 = (float)hCPU->fpr[frC].fp0; //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); //hCPU->fpr[frD].fp0 = s0; //hCPU->fpr[frD].fp1 = s1; DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); DefinePS0(fprAps0, frA); DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); // todo - avoid temporaries when possible DefineTempFPR(fprTemp0, 0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); DefineTempFPR(fprTemp1, 1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); if (frB != frD) { // copy ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); } ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); return true; } bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprDps0); ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprDps1); return true; } bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)hCPU->fpr[frB].fp0); //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)hCPU->fpr[frB].fp1); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); // adjust accuracy PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps0); PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprDps1); return true; } bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; if( frB != frD ) { DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); } return true; } bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(fprAps0, frA); DefinePS1(fprAps1, frA); DefinePS0(fprBps0, frB); DefinePS1(fprBps1, frB); DefinePS0(fprCps0, frC); DefinePS1(fprCps1, frC); DefinePS0(fprDps0, frD); DefinePS1(fprDps1, frD); ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprDps0, fprAps0, fprBps0, fprCps0); ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprDps1, fprAps1, fprBps1, fprCps1); return true; } bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(frpAps0, frA); DefinePS0(frpBps0, frB); DefinePS0(frpDps0, frD); DefinePS1(frpDps1, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0); if (frpDps0 != frpAps0) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps0); return true; } bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS0(frpAps0, frA); DefinePS1(frpBps1, frB); DefinePS0(frpDps0, frD); DefinePS1(frpDps1, frD); if (frpDps0 != frpAps0) ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps1); return true; } bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS1(frpAps1, frA); DefinePS0(frpBps0, frB); DefinePS0(frpDps0, frD); DefinePS1(frpDps1, frD); DefineTempFPR(frpTemp, 0); // todo - optimize cases where a temporary is not necessary ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp); return true; } bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; DefinePS1(frpAps1, frA); DefinePS1(frpBps1, frB); DefinePS0(frpDps0, frD); DefinePS1(frpDps1, frD); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps1); return true; } bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { printf("PS_CMPO0: Not implemented\n"); return false; /* sint32 crfD, frA, frB; uint32 c=0; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; crfD = (opcode>>23)&0x7; IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; */ } bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; // DefinePS1(frpAps0, frA); // DefinePS1(frpBps0, frB); // ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCMPU_BOTTOM, frpAps0, frpBps0, crfD); //crfD >>= 2; DefinePS0(fprA, frA); DefinePS0(fprB, frB); IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); // todo: set fpscr return true; } bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; DefinePS1(fprA, frA); DefinePS1(fprB, frB); IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); return true; }