diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 09feb830..e18803c9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -609,7 +609,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); return false; } return true; @@ -635,7 +635,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); return false; } return true; @@ -894,7 +894,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); return false; } return true; diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 2d98522e..3886db41 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -3,8 +3,6 @@ #include "BackendX64.h" #include "Common/cpu_features.h" -#include "asm/x64util.h" // for recompiler_fres / frsqrte - uint32 _regF64(IMLReg physReg); uint32 _regI32(IMLReg r) @@ -34,231 +32,6 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } -void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR) -{ - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); - // extract scale field and multiply by 16 to get array offset - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4)); - // multiply xmm by scale - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_RECDATA); - if (isLoad) - { - if(scalePS1) - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_ps1)); - else - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_1)); - } - else - { - if (scalePS1) - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_ps1)); - else - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_1)); - } -} - -// generate code for PSQ load for a particular type -// if scaleGQR is -1 then a scale of 1.0 is assumed (no scale) -void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID) -{ - if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1) - { - if (indexed) - { - assert_dbg(); - } - // optimized code for ps float load - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); - x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP); - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, registerXMM, registerXMM); - // note: floats are not scaled - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0) - { - if (indexed) - { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg); - if (g_CPUFeatures.x86.movbe) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - } - } - else - { - if (g_CPUFeatures.x86.movbe) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - } - } - if (g_CPUFeatures.x86.avx) - { - x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); - } - else - { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); - } - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP); - // load constant 1.0 into lower half and upper half of temp register - x64Gen_movddup_xmmReg_memReg64(x64GenContext, registerXMM, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1)); - // overwrite lower half with single from memory - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerXMM, REG_RESV_FPR_TEMP); - // note: floats are not scaled - } - else - { - sint32 readSize; - bool isSigned = false; - if (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1) - { - readSize = 16; - isSigned = true; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1) - { - readSize = 16; - isSigned = false; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1) - { - readSize = 8; - isSigned = true; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1) - { - readSize = 8; - isSigned = false; - } - else - assert_dbg(); - - bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1); - for (sint32 wordIndex = 0; wordIndex < 2; wordIndex++) - { - if (indexed) - { - assert_dbg(); - } - // read from memory - if (wordIndex == 1 && loadPS1 == false) - { - // store constant 1 - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * 1, 1); - } - else - { - uint32 memOffset = memImmS32 + wordIndex * (readSize / 8); - if (readSize == 16) - { - // half word - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset); - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap - if (isSigned) - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - else if (readSize == 8) - { - // byte - x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset); - if (isSigned) - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - // store - x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * wordIndex, REG_RESV_TEMP); - } - } - // convert the two integers to doubles - x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR)); - // scale - if (registerGQR.IsValid()) - PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR); - } -} - -void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR) -{ - bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1); - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); - // extract load type field - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); - // jump cases - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - // default case -> float - - // generate cases - uint32 jumpOffset_endOfFloat; - uint32 jumpOffset_endOfU8; - uint32 jumpOffset_endOfU16; - uint32 jumpOffset_endOfS8; - - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); -} - // load from memory bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { @@ -269,8 +42,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); uint8 mode = imlInstruction->op_storeLoad.mode; - if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 ) // lazy hack for now. Load only one value for SINGLE_INTO_PS0 + if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 ) { // load byte swapped single into temporary FPR if( indexed ) @@ -362,25 +134,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio } } } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 ) - { - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed); - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - { - PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); - } else { return false; @@ -388,188 +141,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio return true; } -void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID) -{ - bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1); - bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - if (registerGQR.IsValid()) - { - // move to temporary xmm and update registerXMM - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - registerXMM = REG_RESV_FPR_TEMP; - // apply scale - if(isFloat == false) - PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, false, storePS1, registerGQR); - } - if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0) - { - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - if (g_CPUFeatures.x86.movbe == false) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - { - cemu_assert_debug(memReg != memRegEx); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); - } - if (g_CPUFeatures.x86.movbe) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP); - else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP); - if (indexed) - { - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); - } - return; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1) - { - if (indexed) - assert_dbg(); // todo - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); - x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - return; - } - // store as integer - // get limit from mode - sint32 clampMin, clampMax; - sint32 bitWriteSize; - if (mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ) - { - clampMin = -128; - clampMax = 127; - bitWriteSize = 8; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ) - { - clampMin = 0; - clampMax = 255; - bitWriteSize = 8; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ) - { - clampMin = 0; - clampMax = 0xFFFF; - bitWriteSize = 16; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 ) - { - clampMin = -32768; - clampMax = 32767; - bitWriteSize = 16; - } - else - { - cemu_assert(false); - } - for (sint32 valueIndex = 0; valueIndex < (storePS1?2:1); valueIndex++) - { - // todo - multiply by GQR scale - if (valueIndex == 0) - { - // convert low half (PS0) to integer - x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, registerXMM); - } - else - { - // load top half (PS1) into bottom half of temporary register - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - // convert low half to integer - x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - // max(i, -clampMin) - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0); - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - // min(i, clampMax) - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0); - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - // endian swap - if( bitWriteSize == 16) - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); - // write to memory - if (indexed) - assert_dbg(); // unsupported - sint32 memOffset = memImmS32 + valueIndex * (bitWriteSize/8); - if (bitWriteSize == 8) - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP); - else if (bitWriteSize == 16) - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP); - } -} - -void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR) -{ - bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1); - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); - // extract store type field - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); - // jump cases - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - // default case -> float - - // generate cases - uint32 jumpOffset_endOfFloat; - uint32 jumpOffset_endOfU8; - uint32 jumpOffset_endOfU16; - uint32 jumpOffset_endOfS8; - - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); -} - // store to memory bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { @@ -646,40 +217,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); } } - else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ) - { - cemu_assert_debug(imlInstruction->op_storeLoad.flags2.notExpanded == false); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed); - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - { - PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); - } else { - if( indexed ) - assert_dbg(); // todo debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode); return false; } return true; } -void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) -{ - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, xmmReg, xmmReg, 1); -} - // FPR op FPR void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { @@ -701,93 +246,26 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) - { - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) - { - // VPUNPCKHQDQ - if (regR == regA) - { - // unpack top to bottom and top - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - //else if ( hasAVXSupport ) - //{ - // // unpack top to bottom and top with non-destructive destination - // // update: On Ivy Bridge this causes weird stalls? - // x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, registerResult, registerOperand, registerOperand); - //} - else - { - // move top to bottom - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, regR, regA); - // duplicate bottom - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR); - } - - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) + if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) { x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) - { - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) - { - if( regR != regA ) - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - _swapPS0PS1(x64GenContext, regR); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) - { - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 2); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) - { - // use unpckhpd here? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 3); - _swapPS0PS1(x64GenContext, regR); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) { x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) - { - x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) { x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) - { - x64Gen_divpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) { x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) - { - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) - { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA); @@ -795,58 +273,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction // move to FPR register x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP); } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) - { - // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - - // call assembly routine to calculate accurate FRSQRTE result in XMM15 - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); - - // copy result to bottom of result register - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) - { - // copy register - if( regR != regA ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - } - // toggle sign bits - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) - { - // copy register - if( regR != regA ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - } - // set sign bit to 0 - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) - { - // calculate bottom half of result - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); - else - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - - // calculate top half of result - // todo - this top to bottom copy can be optimized? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, regA, 3); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom - - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // copy bottom to top - } else { assert_dbg(); @@ -895,29 +321,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB); } } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) - { - // registerResult = registerOperandA - registerOperandB - if( regR == regA ) - { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB); - } - else if (g_CPUFeatures.x86.avx) - { - x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, regR, regA, regB); - } - else if( regR == regB ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB); - } - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { if( regR == regA ) @@ -950,39 +353,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB); uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) - { - // todo: Investigate if there are other optimizations possible if the operand registers overlap - // generic case - // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); - // 3) Interleave top of frTemp and frC - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC); - // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) - { - // todo: Investigate if there are other optimizations possible if the operand registers overlap - // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); - // 3) Copy bottom from frC - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC); - //// 4) Swap bottom and top half - //x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); - // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - - //float s0 = (float)hCPU->fpr[frC].fp0; - //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) + if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) { x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); @@ -997,38 +368,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) - { - // select bottom - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C bottom - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC); - sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B bottom - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex()); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex()); - // select top - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // copy top to bottom (todo: May cause stall?) - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C top - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandC); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regC, 2); - sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B top - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex()); - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandB); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regB, 2); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex()); - } else assert_dbg(); } @@ -1060,13 +399,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, // convert back to 64bit double x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) - { - // convert to 32bit singles - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, regR, regR); - // convert back to 64bit doubles - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, regR, regR); - } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) { // convert bottom to 64bit double diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 9d890d17..f858ea49 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -229,29 +229,11 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // determine partially written result switch (op_storeLoad.mode) { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR2 = op_storeLoad.registerGQR; - break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); registersUsed->readGPR2 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0: - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; default: cemu_assert_unimplemented(); @@ -269,28 +251,11 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // determine partially written result switch (op_storeLoad.mode) { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR3 = op_storeLoad.registerGQR; - break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); registersUsed->readGPR3 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0: - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); break; default: cemu_assert_unimplemented(); @@ -302,18 +267,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem; - // PSQ generic stores also access GQR - switch (op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR3 = op_storeLoad.registerGQR; - break; - default: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - break; - } } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { @@ -324,43 +277,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR2 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR3 = op_storeLoad.registerMem2; - // PSQ generic stores also access GQR - switch (op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR4 = op_storeLoad.registerGQR; - break; - default: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - break; - } } else if (type == PPCREC_IML_TYPE_FPR_R_R) { // fpr operation - if (operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP || - operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP || - operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED || - operation == PPCREC_IML_OP_ASSIGN || - operation == PPCREC_IML_OP_FPR_NEGATE_PAIR || - operation == PPCREC_IML_OP_FPR_ABS_PAIR || - operation == PPCREC_IML_OP_FPR_FRES_PAIR || - operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) - { - // operand read, result written - registersUsed->readGPR1 = op_fpr_r_r.regA; - registersUsed->writtenGPR1 = op_fpr_r_r.regR; - } - else if ( + if ( operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || - operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP || - operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP || - operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM || operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ || - operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT + operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { // operand read, result read and (partially) written @@ -369,12 +293,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->writtenGPR1 = op_fpr_r_r.regR; } else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || - operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM || - operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR || operation == PPCREC_IML_OP_FPR_ADD_BOTTOM || - operation == PPCREC_IML_OP_FPR_ADD_PAIR || - operation == PPCREC_IML_OP_FPR_SUB_PAIR || operation == PPCREC_IML_OP_FPR_SUB_BOTTOM) { // operand read, result read and written @@ -383,14 +303,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->writtenGPR1 = op_fpr_r_r.regR; } - else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) - { - // operand read, result read - registersUsed->readGPR1 = op_fpr_r_r.regA; - registersUsed->readGPR2 = op_fpr_r_r.regR; - } else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT || operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT) { @@ -414,8 +326,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_IML_OP_FPR_SUB_BOTTOM: registersUsed->readGPR3 = op_fpr_r_r_r.regR; break; - case PPCREC_IML_OP_FPR_SUB_PAIR: - break; default: cemu_assert_unimplemented(); } @@ -433,10 +343,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_IML_OP_FPR_SELECT_BOTTOM: registersUsed->readGPR4 = op_fpr_r_r_r_r.regR; break; - case PPCREC_IML_OP_FPR_SUM0: - case PPCREC_IML_OP_FPR_SUM1: - case PPCREC_IML_OP_FPR_SELECT_PAIR: - break; default: cemu_assert_unimplemented(); } @@ -448,8 +354,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const operation == PPCREC_IML_OP_FPR_ABS_BOTTOM || operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM || operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || - operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR) + operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM) { registersUsed->readGPR1 = op_fpr_r.regR; registersUsed->writtenGPR1 = op_fpr_r.regR; @@ -620,27 +525,23 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index cca5c362..23ad699b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -126,46 +126,22 @@ enum PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, // FPU + PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, + PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register PPCREC_IML_OP_FPR_ADD_BOTTOM, - PPCREC_IML_OP_FPR_ADD_PAIR, - PPCREC_IML_OP_FPR_SUB_PAIR, PPCREC_IML_OP_FPR_SUB_BOTTOM, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_PAIR, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, - PPCREC_IML_OP_FPR_DIVIDE_PAIR, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched - PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated - PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated PPCREC_IML_OP_FPR_NEGATE_BOTTOM, - PPCREC_IML_OP_FPR_NEGATE_PAIR, PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) - PPCREC_IML_OP_FPR_ABS_PAIR, - PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy) - PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy) PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0) PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register) - PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision - PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A - PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A // Conversion (FPR_R_R) PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr - // PS - PPCREC_IML_OP_FPR_SUM0, - PPCREC_IML_OP_FPR_SUM1, - - PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register // R_R_R only @@ -297,38 +273,13 @@ enum { // fpr load PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1, + // fpr store PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0 PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0 PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0 - - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, }; struct IMLUsedRegisters @@ -468,7 +419,6 @@ struct IMLInstruction IMLReg registerData; IMLReg registerMem; IMLReg registerMem2; - IMLReg registerGQR; uint8 copyWidth; struct { @@ -476,7 +426,7 @@ struct IMLInstruction bool signExtend : 1; bool notExpanded : 1; // for floats }flags2; - uint8 mode; // transfer mode (copy width, ps0/ps1 behavior) + uint8 mode; // transfer mode sint32 immS32; }op_storeLoad; struct @@ -760,58 +710,48 @@ struct IMLInstruction // FPR // load from memory - void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) + void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian) { - if (registerGQR.IsValid()) - { - if ( mode == 0) - __debugbreak(); - } - this->type = PPCREC_IML_TYPE_FPR_LOAD; this->operation = 0; this->op_storeLoad.registerData = registerDestination; this->op_storeLoad.registerMem = registerMemory; - this->op_storeLoad.registerGQR = registerGQR; this->op_storeLoad.immS32 = immS32; this->op_storeLoad.mode = mode; this->op_storeLoad.flags2.swapEndian = switchEndian; } - void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) + void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian) { this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; this->operation = 0; this->op_storeLoad.registerData = registerDestination; this->op_storeLoad.registerMem = registerMemory1; this->op_storeLoad.registerMem2 = registerMemory2; - this->op_storeLoad.registerGQR = registerGQR; this->op_storeLoad.immS32 = 0; this->op_storeLoad.mode = mode; this->op_storeLoad.flags2.swapEndian = switchEndian; } // store to memory - void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) + void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian) { this->type = PPCREC_IML_TYPE_FPR_STORE; this->operation = 0; this->op_storeLoad.registerData = registerSource; this->op_storeLoad.registerMem = registerMemory; - this->op_storeLoad.registerGQR = registerGQR; this->op_storeLoad.immS32 = immS32; this->op_storeLoad.mode = mode; this->op_storeLoad.flags2.swapEndian = switchEndian; } - void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) + void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian) { this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; this->operation = 0; this->op_storeLoad.registerData = registerSource; this->op_storeLoad.registerMem = registerMemory1; this->op_storeLoad.registerMem2 = registerMemory2; - this->op_storeLoad.registerGQR = registerGQR; this->op_storeLoad.immS32 = immS32; this->op_storeLoad.mode = mode; this->op_storeLoad.flags2.swapEndian = switchEndian; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index d4d13b1f..d9f2878e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -90,21 +90,23 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI */ void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - IMLInstruction* imlInstruction = segIt->imlList.data() + i; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); - } - } - } + cemuLog_logDebugOnce(LogType::Force, "IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n"); + return; + // for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + // { + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // IMLInstruction* imlInstruction = segIt->imlList.data() + i; + // if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) + // { + // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + // } + // else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) + // { + // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + // } + // } + // } } void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg) @@ -224,118 +226,6 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 return true; } -/* - * If value of GQR can be predicted for a given PSQ load or store instruction then replace it with an optimized version - */ -void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) -{ - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for(IMLInstruction& instIt : segIt->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_FPR_LOAD || instIt.type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - if(instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && - instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ) - continue; - // get GQR value - cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid()); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0); - if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) - continue; - uint32 gqrValue; - if (!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) - continue; - - uint32 formatType = (gqrValue >> 16) & 7; - uint32 scale = (gqrValue >> 24) & 0x3F; - if (scale != 0) - continue; // only generic handler supports scale - if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; - if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; - if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - } - else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - if(instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 && - instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - continue; - // get GQR value - cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid()); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0 && gqrIndex < 8); - if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) - continue; - uint32 gqrValue; - if(!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) - continue; - uint32 formatType = (gqrValue >> 16) & 7; - uint32 scale = (gqrValue >> 24) & 0x3F; - if (scale != 0) - continue; // only generic handler supports scale - if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; - if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; - if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - } - } - } -} - // analyses register dependencies across the entire function // per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten) class IMLOptimizerRegIOAnalysis diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 76264717..2a29d1e4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -311,10 +311,7 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) // this simplifies logic during register allocation PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); - // if GQRs can be predicted, optimize PSQ load/stores - PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - - // merge certain float load+store patterns (must happen before FPR register remapping) + // merge certain float load+store patterns IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext); // delay byte swapping for certain load+store patterns IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index eafd6957..05cfa818 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -12,9 +12,7 @@ IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) #define DefinePS0(name, regIndex) IMLReg name = _GetFPRRegPS0(ppcImlGenContext, regIndex); #define DefinePS1(name, regIndex) IMLReg name = _GetFPRRegPS1(ppcImlGenContext, regIndex); - #define DefinePSX(name, regIndex, isPS1) IMLReg name = isPS1 ? _GetFPRRegPS1(ppcImlGenContext, regIndex) : _GetFPRRegPS0(ppcImlGenContext, regIndex); - #define DefineTempFPR(name, index) IMLReg name = _GetFPRTemp(ppcImlGenContext, index); IMLReg _GetFPRRegPS0(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) @@ -51,17 +49,6 @@ void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext assert_dbg(); } -/* - * Rounds pair of doubles to single precision (if single precision accuracy is emulated) - */ -void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) -{ - cemu_assert_suspicious(); // should not be used any longer - ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, fprRegister); - if( flushDenormals ) - assert_dbg(); -} - bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { sint32 rA, frD; @@ -250,7 +237,7 @@ bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod DefinePS0(fprD, frD); // move frA to frD (if different register) if( frD != frA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // multiply bottom double of frD with bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); return true; @@ -268,7 +255,7 @@ bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp, fprB); // move result to frD @@ -296,7 +283,7 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC); // add result to frD @@ -313,7 +300,7 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco } // move frA to frD (if different register) if( frD != frA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // always copy ps0 and ps1 + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // always copy ps0 and ps1 // multiply bottom double of frD with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // add frB @@ -333,10 +320,10 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { // if frB is already in frD we need a temporary register to store the product of frA*frC DefineTempFPR(fprTemp, 0); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC); ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp, fprB); - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprTemp); return false; } if( frD == frC ) @@ -348,7 +335,7 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco } // move frA to frD if( frD != frA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // multiply bottom double of frD with bottom double of frC ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // sub frB @@ -423,7 +410,7 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco DefinePS0(fprD, frD); // move frA to frD (if different register) if( frD != frA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // multiply bottom double of frD with bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC); // adjust accuracy @@ -445,7 +432,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { DefineTempFPR(fprTemp, 0); // move frA to temporary register - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp, fprB); // move result to frD @@ -457,7 +444,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco } // move frA to frD (if different register) if( frD != frA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // subtract bottom double of frB from bottom double of frD ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprD, fprB); // adjust accuracy @@ -483,7 +470,7 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco DefinePS0(fprD, frD); // move frA to frD (if different register) if( frD != frA ) - ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // add bottom double of frD and bottom double of frB ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprD, fprB); // adjust accuracy