From de542410c248b589d8310a9443d0045045d7c9ec Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 8 May 2025 03:48:22 +0200 Subject: [PATCH] PPCRec: Rework floating point instructions (#1554) --- src/CMakeLists.txt | 1 - src/Cafe/CMakeLists.txt | 1 - src/Cafe/HW/Espresso/EspressoISA.h | 12 + src/Cafe/HW/Espresso/PPCTimer.cpp | 1 - .../Recompiler/BackendX64/BackendX64.cpp | 18 +- .../Recompiler/BackendX64/BackendX64.h | 2 + .../Recompiler/BackendX64/BackendX64FPU.cpp | 728 +----- .../BackendX64/BackendX64GenFPU.cpp | 41 + .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 8 +- .../Recompiler/IML/IMLInstruction.cpp | 174 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 182 +- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 171 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 5 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 5 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 59 +- .../Recompiler/PPCRecompilerImlGen.cpp | 252 +- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 2103 ++++++++--------- src/asm/CMakeLists.txt | 53 - src/asm/stub.cpp | 1 - src/asm/x64util.h | 20 - src/asm/x64util_masm.asm | 233 -- src/asm/x64util_nasm.asm | 237 -- 22 files changed, 1428 insertions(+), 2879 deletions(-) delete mode 100644 src/asm/CMakeLists.txt delete mode 100644 src/asm/stub.cpp delete mode 100644 src/asm/x64util.h delete mode 100644 src/asm/x64util_masm.asm delete mode 100644 src/asm/x64util_nasm.asm diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 79471321..ee7f8610 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,7 +49,6 @@ add_subdirectory(audio) add_subdirectory(util) add_subdirectory(imgui) add_subdirectory(resource) -add_subdirectory(asm) add_executable(CemuBin main.cpp diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 34948f13..f4834260 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -548,7 +548,6 @@ else() endif() target_link_libraries(CemuCafe PRIVATE - CemuAsm CemuAudio CemuCommon CemuComponents diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h index e66e1424..5e09763b 100644 --- a/src/Cafe/HW/Espresso/EspressoISA.h +++ b/src/Cafe/HW/Espresso/EspressoISA.h @@ -10,6 +10,18 @@ namespace Espresso CR_BIT_INDEX_SO = 3, }; + enum class PSQ_LOAD_TYPE + { + TYPE_F32 = 0, + TYPE_UNUSED1 = 1, + TYPE_UNUSED2 = 2, + TYPE_UNUSED3 = 3, + TYPE_U8 = 4, + TYPE_U16 = 5, + TYPE_S8 = 6, + TYPE_S16 = 7, + }; + enum class PrimaryOpcode { // underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set) diff --git a/src/Cafe/HW/Espresso/PPCTimer.cpp b/src/Cafe/HW/Espresso/PPCTimer.cpp index c27c94ee..257973a6 100644 --- a/src/Cafe/HW/Espresso/PPCTimer.cpp +++ b/src/Cafe/HW/Espresso/PPCTimer.cpp @@ -1,5 +1,4 @@ #include "Cafe/HW/Espresso/Const.h" -#include "asm/x64util.h" #include "config/ActiveSettings.h" #include "util/helpers/fspinlock.h" #include "util/highresolutiontimer/HighResolutionTimer.h" diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 6a8aac2b..eadb80fb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -609,7 +609,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); return false; } return true; @@ -635,7 +635,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); return false; } return true; @@ -894,7 +894,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); return false; } return true; @@ -1204,9 +1204,11 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) { auto regR = _regF64(imlInstruction->op_r_name.regR); - if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) + if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64)) { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2; + sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2; + x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double)); } else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) { @@ -1281,9 +1283,11 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, { auto regR = _regF64(imlInstruction->op_r_name.regR); uint32 name = imlInstruction->op_r_name.name; - if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32)) + if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64)) { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0)); + sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2; + sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2; + x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0)); } else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index e4d1f5a9..de415ca9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -205,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); +void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); @@ -230,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32); void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc); +void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc); void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 4d9a538d..dc07f9d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -3,8 +3,6 @@ #include "BackendX64.h" #include "Common/cpu_features.h" -#include "asm/x64util.h" // for recompiler_fres / frsqrte - uint32 _regF64(IMLReg physReg); uint32 _regI32(IMLReg r) @@ -34,231 +32,6 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } -void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR) -{ - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); - // extract scale field and multiply by 16 to get array offset - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4)); - // multiply xmm by scale - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_RECDATA); - if (isLoad) - { - if(scalePS1) - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_ps1)); - else - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_1)); - } - else - { - if (scalePS1) - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_ps1)); - else - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_1)); - } -} - -// generate code for PSQ load for a particular type -// if scaleGQR is -1 then a scale of 1.0 is assumed (no scale) -void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID) -{ - if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1) - { - if (indexed) - { - assert_dbg(); - } - // optimized code for ps float load - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); - x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP); - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, registerXMM, registerXMM); - // note: floats are not scaled - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0) - { - if (indexed) - { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg); - if (g_CPUFeatures.x86.movbe) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - } - } - else - { - if (g_CPUFeatures.x86.movbe) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - } - } - if (g_CPUFeatures.x86.avx) - { - x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); - } - else - { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); - } - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP); - // load constant 1.0 into lower half and upper half of temp register - x64Gen_movddup_xmmReg_memReg64(x64GenContext, registerXMM, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1)); - // overwrite lower half with single from memory - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerXMM, REG_RESV_FPR_TEMP); - // note: floats are not scaled - } - else - { - sint32 readSize; - bool isSigned = false; - if (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1) - { - readSize = 16; - isSigned = true; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1) - { - readSize = 16; - isSigned = false; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1) - { - readSize = 8; - isSigned = true; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1) - { - readSize = 8; - isSigned = false; - } - else - assert_dbg(); - - bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1); - for (sint32 wordIndex = 0; wordIndex < 2; wordIndex++) - { - if (indexed) - { - assert_dbg(); - } - // read from memory - if (wordIndex == 1 && loadPS1 == false) - { - // store constant 1 - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * 1, 1); - } - else - { - uint32 memOffset = memImmS32 + wordIndex * (readSize / 8); - if (readSize == 16) - { - // half word - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset); - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap - if (isSigned) - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - else if (readSize == 8) - { - // byte - x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset); - if (isSigned) - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - // store - x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * wordIndex, REG_RESV_TEMP); - } - } - // convert the two integers to doubles - x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR)); - // scale - if (registerGQR.IsValid()) - PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR); - } -} - -void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR) -{ - bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1); - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); - // extract load type field - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); - // jump cases - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - // default case -> float - - // generate cases - uint32 jumpOffset_endOfFloat; - uint32 jumpOffset_endOfU8; - uint32 jumpOffset_endOfU16; - uint32 jumpOffset_endOfS8; - - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); -} - // load from memory bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { @@ -269,7 +42,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); uint8 mode = imlInstruction->op_storeLoad.mode; - if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) + if( mode == PPCREC_FPR_LD_MODE_SINGLE ) { // load byte swapped single into temporary FPR if( indexed ) @@ -299,10 +72,9 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio else { x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM); - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM); - } + } } - else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 ) + else if( mode == PPCREC_FPR_LD_MODE_DOUBLE ) { if( g_CPUFeatures.x86.avx ) { @@ -361,25 +133,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio } } } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 ) - { - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed); - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - { - PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); - } else { return false; @@ -387,188 +140,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio return true; } -void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID) -{ - bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1); - bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - if (registerGQR.IsValid()) - { - // move to temporary xmm and update registerXMM - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - registerXMM = REG_RESV_FPR_TEMP; - // apply scale - if(isFloat == false) - PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, false, storePS1, registerGQR); - } - if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0) - { - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - if (g_CPUFeatures.x86.movbe == false) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - { - cemu_assert_debug(memReg != memRegEx); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); - } - if (g_CPUFeatures.x86.movbe) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP); - else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP); - if (indexed) - { - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); - } - return; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1) - { - if (indexed) - assert_dbg(); // todo - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); - x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - return; - } - // store as integer - // get limit from mode - sint32 clampMin, clampMax; - sint32 bitWriteSize; - if (mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ) - { - clampMin = -128; - clampMax = 127; - bitWriteSize = 8; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ) - { - clampMin = 0; - clampMax = 255; - bitWriteSize = 8; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ) - { - clampMin = 0; - clampMax = 0xFFFF; - bitWriteSize = 16; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 ) - { - clampMin = -32768; - clampMax = 32767; - bitWriteSize = 16; - } - else - { - cemu_assert(false); - } - for (sint32 valueIndex = 0; valueIndex < (storePS1?2:1); valueIndex++) - { - // todo - multiply by GQR scale - if (valueIndex == 0) - { - // convert low half (PS0) to integer - x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, registerXMM); - } - else - { - // load top half (PS1) into bottom half of temporary register - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - // convert low half to integer - x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - // max(i, -clampMin) - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0); - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - // min(i, clampMax) - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0); - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - // endian swap - if( bitWriteSize == 16) - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); - // write to memory - if (indexed) - assert_dbg(); // unsupported - sint32 memOffset = memImmS32 + valueIndex * (bitWriteSize/8); - if (bitWriteSize == 8) - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP); - else if (bitWriteSize == 16) - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP); - } -} - -void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR) -{ - bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1); - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR)); - // extract store type field - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); - // jump cases - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - // default case -> float - - // generate cases - uint32 jumpOffset_endOfFloat; - uint32 jumpOffset_endOfU8; - uint32 jumpOffset_endOfU16; - uint32 jumpOffset_endOfS8; - - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); -} - // store to memory bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { @@ -578,7 +149,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti if( indexed ) realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); uint8 mode = imlInstruction->op_storeLoad.mode; - if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 ) + if( mode == PPCREC_FPR_ST_MODE_SINGLE ) { if (imlInstruction->op_storeLoad.flags2.notExpanded) { @@ -607,7 +178,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } } - else if( mode == PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0 ) + else if( mode == PPCREC_FPR_ST_MODE_DOUBLE ) { if( indexed ) { @@ -645,192 +216,61 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); } } - else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ) - { - cemu_assert_debug(imlInstruction->op_storeLoad.flags2.notExpanded == false); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed); - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - { - PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR); - } else { - if( indexed ) - assert_dbg(); // todo debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode); return false; } return true; } -void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) -{ - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, xmmReg, xmmReg, 1); -} - // FPR op FPR void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ) + { + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR); + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA); + x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr); + return; + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ) + { + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR); + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA); + x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr); + return; + } + uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); - - if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) - { - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) - { - // VPUNPCKHQDQ - if (regR == regA) - { - // unpack top to bottom and top - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - //else if ( hasAVXSupport ) - //{ - // // unpack top to bottom and top with non-destructive destination - // // update: On Ivy Bridge this causes weird stalls? - // x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, registerResult, registerOperand, registerOperand); - //} - else - { - // move top to bottom - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, regR, regA); - // duplicate bottom - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR); - } - - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) + if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN ) { x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) - { - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) - { - if( regR != regA ) - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - _swapPS0PS1(x64GenContext, regR); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) - { - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 2); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) - { - // use unpckhpd here? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 3); - _swapPS0PS1(x64GenContext, regR); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY ) { x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) - { - x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE ) { x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) - { - x64Gen_divpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD ) { x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) - { - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) - { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB ) { x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA); } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ ) { x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); // move to FPR register x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP); } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) - { - // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - - // call assembly routine to calculate accurate FRSQRTE result in XMM15 - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); - - // copy result to bottom of result register - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) - { - // copy register - if( regR != regA ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - } - // toggle sign bits - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) - { - // copy register - if( regR != regA ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - } - // set sign bit to 0 - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) - { - // calculate bottom half of result - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); - else - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - - // calculate top half of result - // todo - this top to bottom copy can be optimized? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, regA, 3); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom - - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // copy bottom to top - } else { assert_dbg(); @@ -846,7 +286,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA); uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB); - if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) + if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY) { if (regR == regA) { @@ -862,7 +302,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB); } } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD) { // todo: Use AVX 3-operand VADDSD if available if (regR == regA) @@ -879,30 +319,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB); } } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) - { - // registerResult = registerOperandA - registerOperandB - if( regR == regA ) - { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB); - } - else if (g_CPUFeatures.x86.avx) - { - x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, regR, regA, regB); - } - else if( regR == regB ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB ) { if( regR == regA ) { @@ -934,39 +351,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB); uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) - { - // todo: Investigate if there are other optimizations possible if the operand registers overlap - // generic case - // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); - // 3) Interleave top of frTemp and frC - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC); - // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) - { - // todo: Investigate if there are other optimizations possible if the operand registers overlap - // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); - // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); - // 3) Copy bottom from frC - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC); - //// 4) Swap bottom and top half - //x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); - // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); - - //float s0 = (float)hCPU->fpr[frC].fp0; - //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) + if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT ) { x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); @@ -981,38 +366,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc // end PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) - { - // select bottom - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C bottom - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC); - sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B bottom - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex()); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex()); - // select top - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // copy top to bottom (todo: May cause stall?) - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C top - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandC); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regC, 2); - sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B top - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex()); - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandB); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regB, 2); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex()); - } else assert_dbg(); } @@ -1021,15 +374,19 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, { uint32 regR = _regF64(imlInstruction->op_fpr_r.regR); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) + if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE ) { x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE ) + { + x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1)); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS ) { x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ) { x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } @@ -1040,19 +397,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, // convert back to 64bit double x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) - { - // convert to 32bit singles - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, regR, regR); - // convert back to 64bit doubles - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, regR, regR); - } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64) { // convert bottom to 64bit double x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); - // copy to top half - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR); } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp index 882820e2..4bbcc025 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp @@ -213,6 +213,37 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi } } +void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) +{ + // SSE2 + if( memRegister == X86_REG_RSP ) + { + // MOVSD , [RSP+] + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x10); + x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8); + x64Gen_writeU8(x64GenContext, 0x24); + x64Gen_writeU32(x64GenContext, memImmU32); + } + else if( memRegister == 15 ) + { + // MOVSD , [R15+] + x64Gen_writeU8(x64GenContext, 0x36); + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x10); + x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8); + x64Gen_writeU32(x64GenContext, memImmU32); + } + else + { + assert_dbg(); + } +} + void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE3 @@ -561,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7)); } +void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc) +{ + // SSE2 + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x2A); + x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7)); +} + void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc) { // SSE2 diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 07fd4002..f736c2a7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -189,9 +189,13 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di { strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); } - else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999)) + if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2)) { - strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0); + strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2)); + if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1) + strOutput.add(".ps1"); + else + strOutput.add(".ps0"); } else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index cb481043..60b7c6ca 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -226,35 +226,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // address is in gpr register if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR1 = op_storeLoad.registerMem; - // determine partially written result - switch (op_storeLoad.mode) - { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR2 = op_storeLoad.registerGQR; - break; - case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: - // PS1 remains the same - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - registersUsed->readGPR2 = op_storeLoad.registerData; - break; - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - break; - default: - cemu_assert_unimplemented(); - } } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { @@ -265,34 +236,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem2; - // determine partially written result - switch (op_storeLoad.mode) - { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR3 = op_storeLoad.registerGQR; - break; - case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: - // PS1 remains the same - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - registersUsed->readGPR3 = op_storeLoad.registerData; - break; - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - break; - default: - cemu_assert_unimplemented(); - } } else if (type == PPCREC_IML_TYPE_FPR_STORE) { @@ -300,18 +243,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_storeLoad.registerData; if (op_storeLoad.registerMem.IsValid()) registersUsed->readGPR2 = op_storeLoad.registerMem; - // PSQ generic stores also access GQR - switch (op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR3 = op_storeLoad.registerGQR; - break; - default: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - break; - } } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { @@ -322,72 +253,34 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR2 = op_storeLoad.registerMem; if (op_storeLoad.registerMem2.IsValid()) registersUsed->readGPR3 = op_storeLoad.registerMem2; - // PSQ generic stores also access GQR - switch (op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(op_storeLoad.registerGQR.IsValid()); - registersUsed->readGPR4 = op_storeLoad.registerGQR; - break; - default: - cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid()); - break; - } } else if (type == PPCREC_IML_TYPE_FPR_R_R) { // fpr operation - if (operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP || - operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP || - operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED || - operation == PPCREC_IML_OP_ASSIGN || - operation == PPCREC_IML_OP_FPR_NEGATE_PAIR || - operation == PPCREC_IML_OP_FPR_ABS_PAIR || - operation == PPCREC_IML_OP_FPR_FRES_PAIR || - operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) - { - // operand read, result written - registersUsed->readGPR1 = op_fpr_r_r.regA; - registersUsed->writtenGPR1 = op_fpr_r_r.regR; - } - else if ( - operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || - operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP || - operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP || - operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM || - operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ || - operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT + if ( + operation == PPCREC_IML_OP_FPR_ASSIGN || + operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 || + operation == PPCREC_IML_OP_FPR_FCTIWZ ) { - // operand read, result read and (partially) written registersUsed->readGPR1 = op_fpr_r_r.regA; - registersUsed->readGPR2 = op_fpr_r_r.regR; registersUsed->writtenGPR1 = op_fpr_r_r.regR; } - else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || - operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || - operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM || - operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR || - operation == PPCREC_IML_OP_FPR_ADD_BOTTOM || - operation == PPCREC_IML_OP_FPR_ADD_PAIR || - operation == PPCREC_IML_OP_FPR_SUB_PAIR || - operation == PPCREC_IML_OP_FPR_SUB_BOTTOM) + else if (operation == PPCREC_IML_OP_FPR_MULTIPLY || + operation == PPCREC_IML_OP_FPR_DIVIDE || + operation == PPCREC_IML_OP_FPR_ADD || + operation == PPCREC_IML_OP_FPR_SUB) { - // operand read, result read and written registersUsed->readGPR1 = op_fpr_r_r.regA; registersUsed->readGPR2 = op_fpr_r_r.regR; registersUsed->writtenGPR1 = op_fpr_r_r.regR; } - else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) + else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT || + operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT) { - // operand read, result read + registersUsed->writtenGPR1 = op_fpr_r_r.regR; registersUsed->readGPR1 = op_fpr_r_r.regA; - registersUsed->readGPR2 = op_fpr_r_r.regR; } else cemu_assert_unimplemented(); @@ -398,19 +291,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR1 = op_fpr_r_r_r.regA; registersUsed->readGPR2 = op_fpr_r_r_r.regB; registersUsed->writtenGPR1 = op_fpr_r_r_r.regR; - // handle partially written result - switch (operation) - { - case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM: - case PPCREC_IML_OP_FPR_ADD_BOTTOM: - case PPCREC_IML_OP_FPR_SUB_BOTTOM: - registersUsed->readGPR3 = op_fpr_r_r_r.regR; - break; - case PPCREC_IML_OP_FPR_SUB_PAIR: - break; - default: - cemu_assert_unimplemented(); - } } else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) { @@ -419,33 +299,23 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR2 = op_fpr_r_r_r_r.regB; registersUsed->readGPR3 = op_fpr_r_r_r_r.regC; registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR; - // handle partially written result - switch (operation) - { - case PPCREC_IML_OP_FPR_SELECT_BOTTOM: - registersUsed->readGPR4 = op_fpr_r_r_r_r.regR; - break; - case PPCREC_IML_OP_FPR_SUM0: - case PPCREC_IML_OP_FPR_SUM1: - case PPCREC_IML_OP_FPR_SELECT_PAIR: - break; - default: - cemu_assert_unimplemented(); - } } else if (type == PPCREC_IML_TYPE_FPR_R) { // fpr operation - if (operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM || - operation == PPCREC_IML_OP_FPR_ABS_BOTTOM || - operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM || - operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || - operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR) + if (operation == PPCREC_IML_OP_FPR_NEGATE || + operation == PPCREC_IML_OP_FPR_ABS || + operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS || + operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 || + operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM) { registersUsed->readGPR1 = op_fpr_r.regR; registersUsed->writtenGPR1 = op_fpr_r.regR; } + else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE) + { + registersUsed->writtenGPR1 = op_fpr_r.regR; + } else cemu_assert_unimplemented(); } @@ -608,27 +478,23 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); - op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 3ba0a1af..3b3898e9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -126,46 +126,22 @@ enum PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, // FPU - PPCREC_IML_OP_FPR_ADD_BOTTOM, - PPCREC_IML_OP_FPR_ADD_PAIR, - PPCREC_IML_OP_FPR_SUB_PAIR, - PPCREC_IML_OP_FPR_SUB_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_PAIR, - PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, - PPCREC_IML_OP_FPR_DIVIDE_PAIR, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched - PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, - PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated - PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated - PPCREC_IML_OP_FPR_NEGATE_BOTTOM, - PPCREC_IML_OP_FPR_NEGATE_PAIR, - PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) - PPCREC_IML_OP_FPR_ABS_PAIR, - PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy) - PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy) - PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0) + PPCREC_IML_OP_FPR_ASSIGN, + PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register + PPCREC_IML_OP_FPR_ADD, + PPCREC_IML_OP_FPR_SUB, + PPCREC_IML_OP_FPR_MULTIPLY, + PPCREC_IML_OP_FPR_DIVIDE, + PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place + PPCREC_IML_OP_FPR_NEGATE, + PPCREC_IML_OP_FPR_ABS, // abs(fpr) + PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr) PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register) - PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision - PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, - PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, - PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A - PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A - // PS - PPCREC_IML_OP_FPR_SUM0, - PPCREC_IML_OP_FPR_SUM1, - - - // R_R_R only - - // R_R_S32 only + PPCREC_IML_OP_FPR_FCTIWZ, + PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A + // Conversion (FPR_R_R) + PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr + PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr // R_R_R + R_R_S32 PPCREC_IML_OP_ADD, // also R_R_R_CARRY @@ -275,7 +251,7 @@ enum // IMLName PPCREC_NAME_TEMPORARY = 1000, PPCREC_NAME_R0 = 2000, PPCREC_NAME_SPR0 = 3000, - PPCREC_NAME_FPR0 = 4000, + PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1 PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 PPCREC_NAME_XER_CA = 6000, // carry bit from XER PPCREC_NAME_XER_OV = 6001, // overflow bit from XER @@ -291,39 +267,14 @@ enum // IMLName enum { // fpr load - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, - PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1, + PPCREC_FPR_LD_MODE_SINGLE, + PPCREC_FPR_LD_MODE_DOUBLE, + // fpr store - PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0 - PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0 + PPCREC_FPR_ST_MODE_SINGLE, + PPCREC_FPR_ST_MODE_DOUBLE, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0 - - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, }; struct IMLUsedRegisters @@ -463,7 +414,6 @@ struct IMLInstruction IMLReg registerData; IMLReg registerMem; IMLReg registerMem2; - IMLReg registerGQR; uint8 copyWidth; struct { @@ -471,7 +421,7 @@ struct IMLInstruction bool signExtend : 1; bool notExpanded : 1; // for floats }flags2; - uint8 mode; // transfer mode (copy width, ps0/ps1 behavior) + uint8 mode; // transfer mode sint32 immS32; }op_storeLoad; struct @@ -752,6 +702,56 @@ struct IMLInstruction this->op_call_imm.regReturn = regReturn; } + // FPR + + // load from memory + void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_LOAD; + this->operation = 0; + this->op_storeLoad.registerData = registerDestination; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; + this->operation = 0; + this->op_storeLoad.registerData = registerDestination; + this->op_storeLoad.registerMem = registerMemory1; + this->op_storeLoad.registerMem2 = registerMemory2; + this->op_storeLoad.immS32 = 0; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + // store to memory + void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_STORE; + this->operation = 0; + this->op_storeLoad.registerData = registerSource; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; + this->operation = 0; + this->op_storeLoad.registerData = registerSource; + this->op_storeLoad.registerMem = registerMemory1; + this->op_storeLoad.registerMem2 = registerMemory2; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_FPR_COMPARE; @@ -762,6 +762,44 @@ struct IMLInstruction this->op_fpr_compare.cond = cond; } + void make_fpr_r(sint32 operation, IMLReg registerResult) + { + // OP (fpr) + this->type = PPCREC_IML_TYPE_FPR_R; + this->operation = operation; + this->op_fpr_r.regR = registerResult; + } + + void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) + { + // fpr OP fpr + this->type = PPCREC_IML_TYPE_FPR_R_R; + this->operation = operation; + this->op_fpr_r_r.regR = registerResult; + this->op_fpr_r_r.regA = registerOperand; + } + + void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) + { + // fpr = OP (fpr,fpr) + this->type = PPCREC_IML_TYPE_FPR_R_R_R; + this->operation = operation; + this->op_fpr_r_r_r.regR = registerResult; + this->op_fpr_r_r_r.regA = registerOperand1; + this->op_fpr_r_r_r.regB = registerOperand2; + } + + void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) + { + // fpr = OP (fpr,fpr,fpr) + this->type = PPCREC_IML_TYPE_FPR_R_R_R_R; + this->operation = operation; + this->op_fpr_r_r_r_r.regR = registerResult; + this->op_fpr_r_r_r_r.regA = registerOperandA; + this->op_fpr_r_r_r_r.regB = registerOperandB; + this->op_fpr_r_r_r_r.regC = registerOperandC; + } + /* X86 specific */ void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index f2cf173a..d0348e5a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -34,8 +34,8 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI if (imlInstruction->IsSuffixInstruction()) break; // check if FPR is stored - if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0) || - (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0)) + if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) || + (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE)) { if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex) { @@ -73,7 +73,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI { // insert expand instruction after store IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, _FPRRegFromID(fprIndex)); + newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex)); } } @@ -90,21 +90,23 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI */ void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - IMLInstruction* imlInstruction = segIt->imlList.data() + i; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); - } - } - } + cemuLog_logDebugOnce(LogType::Force, "IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n"); + return; + // for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + // { + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // IMLInstruction* imlInstruction = segIt->imlList.data() + i; + // if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) + // { + // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + // } + // else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) + // { + // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + // } + // } + // } } void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg) @@ -207,133 +209,22 @@ sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqr bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue) { - // UGQR 2 to 7 are initialized by the OS and we assume that games won't ever permanently touch those - // todo - hack - replace with more accurate solution - if (gqrIndex == 2) - gqrValue = 0x00040004; - else if (gqrIndex == 3) - gqrValue = 0x00050005; - else if (gqrIndex == 4) - gqrValue = 0x00060006; - else if (gqrIndex == 5) - gqrValue = 0x00070007; + // the default configuration is: + // UGQR0 = 0x00000000 + // UGQR2 = 0x00040004 + // UGQR3 = 0x00050005 + // UGQR4 = 0x00060006 + // UGQR5 = 0x00070007 + // but games are free to modify UGQR2 to UGQR7 it seems. + // no game modifies UGQR0 so it's safe enough to optimize for the default value + // Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo + if (gqrIndex == 0) + gqrValue = 0x00000000; else return false; return true; } -/* - * If value of GQR can be predicted for a given PSQ load or store instruction then replace it with an optimized version - */ -void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) -{ - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for(IMLInstruction& instIt : segIt->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_FPR_LOAD || instIt.type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - if(instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && - instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ) - continue; - // get GQR value - cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid()); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0); - if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) - continue; - uint32 gqrValue; - if (!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) - continue; - - uint32 formatType = (gqrValue >> 16) & 7; - uint32 scale = (gqrValue >> 24) & 0x3F; - if (scale != 0) - continue; // only generic handler supports scale - if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; - if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; - if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - } - else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - if(instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 && - instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - continue; - // get GQR value - cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid()); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0 && gqrIndex < 8); - if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) - continue; - uint32 gqrValue; - if(!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) - continue; - uint32 formatType = (gqrValue >> 16) & 7; - uint32 scale = (gqrValue >> 24) & 0x3F; - if (scale != 0) - continue; // only generic handler supports scale - if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; - if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - { - if (formatType == 0) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - else if (formatType == 4) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1; - else if (formatType == 5) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1; - else if (formatType == 6) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; - else if (formatType == 7) - instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; - if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - instIt.op_storeLoad.registerGQR = IMLREG_INVALID; - } - } - } - } -} - // analyses register dependencies across the entire function // per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten) class IMLOptimizerRegIOAnalysis diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index d411be14..5de1408b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -2093,7 +2093,10 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); if (imlSegment->HasSuffixInstruction()) { - cemu_assert_debug(!currentRange); // currentRange should be NULL? + if (currentRange) + { + cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress); + } for (auto& remainingRange : activeRanges) { cemu_assert_debug(!remainingRange->hasStore); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index be1846de..0dbc073b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -311,10 +311,7 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) // this simplifies logic during register allocation PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); - // if GQRs can be predicted, optimize PSQ load/stores - PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - - // merge certain float load+store patterns (must happen before FPR register remapping) + // merge certain float load+store patterns IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext); // delay byte swapping for certain load+store patterns IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 5d30267d..bfb2aed5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -14,34 +14,20 @@ void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint3 void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index); void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint); -// GPR register management -IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); +// Register management +IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat); -// FPR register management -IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); -IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); +IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, IMLReg registerResult); // IML generation - FPU -bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble); +bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble); +bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble); +bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble); bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -67,22 +53,17 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate); +bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate); +bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1); +bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1); bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative); bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -102,3 +83,19 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); + +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken); +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken); // no else segment +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex); + +class IMLRedirectInstOutput +{ +public: + IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment); + ~IMLRedirectInstOutput(); + + +private: + ppcImlGenContext_t* m_context; + IMLSegment* m_prevSegment; +}; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index a705baf8..e76a53fa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -87,8 +87,7 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex } // create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards -template -void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken) +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken) { IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); @@ -118,6 +117,122 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P basicBlockInfo.appendSegment = segMerge; } +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 2); + IMLSegment* segBranchNotTaken = segments[0]; + IMLSegment* segMerge = segments[1]; + + // link the segments + segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(segMerge); + currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken); + segBranchNotTaken->SetLinkBranchNotTaken(segMerge); + // generate code for branch not taken segment + ppcImlGenContext.currentOutputSegment = segBranchNotTaken; + genSegmentBranchNotTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); + // make merge segment the new write segment + ppcImlGenContext.currentOutputSegment = segMerge; + basicBlockInfo.appendSegment = segMerge; +} + +IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index); + +IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext) +{ + m_prevSegment = ppcImlGenContext->currentOutputSegment; + cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment); + if (outputSegment == ppcImlGenContext->currentOutputSegment) + { + m_prevSegment = nullptr; + return; + } + m_context->currentBasicBlock->appendSegment = outputSegment; + m_context->currentOutputSegment = outputSegment; +} + +IMLRedirectInstOutput::~IMLRedirectInstOutput() +{ + if (m_prevSegment) + { + m_context->currentBasicBlock->appendSegment = m_prevSegment; + m_context->currentOutputSegment = m_prevSegment; + } +} + +// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction + + const sint32 numBranchSegments = count + 1; + const sint32 numCaseSegments = count; + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1); + IMLSegment** extraBranchSegments = segments.data(); + IMLSegment** caseSegments = segments.data() + numBranchSegments - 1; + IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments]; + + // move links to the merge segment + mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(nullptr); + currentWriteSegment->SetLinkBranchNotTaken(nullptr); + + for (sint32 i=0; iSetLinkBranchTaken(caseSegments[i]); + seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1)); + seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ); + seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true); + } + else + { + cemu_assert_debug(defaultCaseIndex < numCaseSegments); + seg->SetLinkBranchTaken(caseSegments[defaultCaseIndex]); + seg->AppendInstruction()->make_jump(); + } + } + // link case segments + for (sint32 i=0; iSetLinkBranchTaken(mergeSegment); + // -> Jumps are added after the instructions + } + else + { + seg->SetLinkBranchTaken(mergeSegment); + } + } + ppcImlGenContext.currentOutputSegment = mergeSegment; + basicBlockInfo.appendSegment = mergeSegment; +} + IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat) { auto it = ppcImlGenContext->mappedRegs.find(mappedName); @@ -212,32 +327,14 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index) return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } -// get throw-away register. Only valid for the scope of a single translated instruction -// be careful to not collide with manually loaded temporary register +// get throw-away register +// be careful to not collide with other temporary register IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { cemu_assert_debug(index < 4); return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } -/* - * Loads a PPC fpr into any of the available IML FPU registers - * If loadNew is false, it will check first if the fpr is already loaded into any IML register - */ -IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) -{ - return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); -} - -/* - * Checks if a PPC fpr register is already loaded into any IML register - * If not, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register) - */ -IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) -{ - return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64); -} - bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) { for (sint32 i = 0; i < 6; i++) @@ -1050,15 +1147,15 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load masked shift factor into temporary register ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmpShiftAmount, regB, 0x3F); - ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 32, regTmpCondBool, IMLCondition::UNSIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 31, regTmpCondBool, IMLCondition::UNSIGNED_GT); ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, [&](ppcImlGenContext_t& genCtx) { - /* branch taken */ - genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount); - genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry + /* branch taken, shift size 32 or above */ + genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, 31); // shift the sign bit into all the bits + genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ); }, [&](ppcImlGenContext_t& genCtx) { @@ -1073,6 +1170,8 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount); } ); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1909,23 +2008,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 12: // multiply scalar - if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false) + case 12: // PS_MULS0 + if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 13: // multiply scalar - if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false) + case 13: // PS_MULS1 + if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 14: // multiply add scalar - if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false) + case 14: // PS_MADDS0 + if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 15: // multiply add scalar - if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false) + case 15: // PS_MADDS1 + if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -1992,22 +2091,22 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 28: // multiply sub paired - if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode) == false) + case 28: // PS_MSUB + if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 29: // multiply add paired + case 29: // PS_MADD if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 30: // negative multiply sub paired - if (PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext, opcode) == false) + case 30: // PS_NMSUB + if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 31: // negative multiply add paired + case 31: // PS_NMADD if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; @@ -2339,8 +2438,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 534: // LWBRX PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false); break; - case 535: - if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false) + case 535: // LFSX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2348,8 +2447,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_SRW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 567: - if (PPCRecompilerImlGen_LFSUX(ppcImlGenContext, opcode) == false) + case 567: // LFSUX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2360,13 +2459,13 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 598: PPCRecompilerImlGen_SYNC(ppcImlGenContext, opcode); break; - case 599: - if (PPCRecompilerImlGen_LFDX(ppcImlGenContext, opcode) == false) + case 599: // LFDX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 631: - if (PPCRecompilerImlGen_LFDUX(ppcImlGenContext, opcode) == false) + case 631: // LFDUX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2374,20 +2473,24 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, false, false)) unsupportedInstructionFound = true; break; - case 663: - if (PPCRecompilerImlGen_STFSX(ppcImlGenContext, opcode) == false) + case 663: // STFSX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; break; - case 695: - if (PPCRecompilerImlGen_STFSUX(ppcImlGenContext, opcode) == false) + case 695: // STFSUX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; break; case 725: if (PPCRecompilerImlGen_STSWI(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 727: - if (PPCRecompilerImlGen_STFDX(ppcImlGenContext, opcode) == false) + case 727: // STFDX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, true) == false) + unsupportedInstructionFound = true; + break; + case 759: // STFDUX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; break; case 790: // LHBRX @@ -2488,53 +2591,53 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 47: PPCRecompilerImlGen_STMW(ppcImlGenContext, opcode); break; - case 48: - if (PPCRecompilerImlGen_LFS(ppcImlGenContext, opcode) == false) + case 48: // LFS + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 49: - if (PPCRecompilerImlGen_LFSU(ppcImlGenContext, opcode) == false) + case 49: // LFSU + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 50: - if (PPCRecompilerImlGen_LFD(ppcImlGenContext, opcode) == false) + case 50: // LFD + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 51: - if (PPCRecompilerImlGen_LFDU(ppcImlGenContext, opcode) == false) + case 51: // LFDU + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 52: - if (PPCRecompilerImlGen_STFS(ppcImlGenContext, opcode) == false) + case 52: // STFS + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 53: - if (PPCRecompilerImlGen_STFSU(ppcImlGenContext, opcode) == false) + case 53: // STFSU + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 54: - if (PPCRecompilerImlGen_STFD(ppcImlGenContext, opcode) == false) + case 54: // STFD + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 55: - if (PPCRecompilerImlGen_STFDU(ppcImlGenContext, opcode) == false) + case 55: // STFDU + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 56: - if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 57: - if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2587,12 +2690,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) } break; case 60: - if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 61: - if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -2702,7 +2805,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) } // returns false if code flow is not interrupted -// continueDefaultPath: Controls if bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget) { hasBranchTarget = false; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 96a7b560..7eb8a4b6 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -3,176 +3,82 @@ #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +#include "IML/IML.h" ATTR_MS_ABI double frsqrte_espresso(double input); ATTR_MS_ABI double fres_espresso(double input); IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) +#define DefinePS0(name, regIndex) IMLReg name = _GetFPRRegPS0(ppcImlGenContext, regIndex); +#define DefinePS1(name, regIndex) IMLReg name = _GetFPRRegPS1(ppcImlGenContext, regIndex); +#define DefinePSX(name, regIndex, isPS1) IMLReg name = isPS1 ? _GetFPRRegPS1(ppcImlGenContext, regIndex) : _GetFPRRegPS0(ppcImlGenContext, regIndex); +#define DefineTempFPR(name, index) IMLReg name = _GetFPRTemp(ppcImlGenContext, index); + +IMLReg _GetFPRRegPS0(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { - // load from memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + 0, IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) +IMLReg _GetFPRRegPS1(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { - // load from memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory1; - imlInstruction->op_storeLoad.registerMem2 = registerMemory2; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = 0; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + 1, IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) +IMLReg _GetFPRTemp(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { - // store to memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + index, IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID) +IMLReg _GetFPRReg(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex, bool selectPS1) { - // store to memory - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory1; - imlInstruction->op_storeLoad.registerMem2 = registerMemory2; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) +void PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { - // fpr OP fpr - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r.regR = registerResult; - imlInstruction->op_fpr_r_r.regA = registerOperand; -} - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) -{ - // fpr = OP (fpr,fpr) - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r_r.regR = registerResult; - imlInstruction->op_fpr_r_r_r.regA = registerOperand1; - imlInstruction->op_fpr_r_r_r.regB = registerOperand2; -} - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) -{ - // fpr = OP (fpr,fpr,fpr) - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r_r_r.regR = registerResult; - imlInstruction->op_fpr_r_r_r_r.regA = registerOperandA; - imlInstruction->op_fpr_r_r_r_r.regB = registerOperandB; - imlInstruction->op_fpr_r_r_r_r.regC = registerOperandC; -} - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, IMLReg registerResult) -{ - // OP (fpr) - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r.regR = registerResult; -} - -/* - * Rounds the bottom double to single precision (if single precision accuracy is emulated) - */ -void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) -{ - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL, PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); if( flushDenormals ) assert_dbg(); } -/* - * Rounds pair of doubles to single precision (if single precision accuracy is emulated) - */ -void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) -{ - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL, PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, fprRegister); - if( flushDenormals ) - assert_dbg(); -} - -bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { sint32 rA, frD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) + if (withUpdate) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); + // add imm to memory register + cemu_assert_debug(rA != 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); + imm = 0; // set imm to 0 so we dont add it twice + } + DefinePS0(fpPs0, frD); + if (isDouble) + { + // LFD/LFDU + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE, true); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + // LFS/LFSU + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE, true); + if( ppcImlGenContext->LSQE ) + { + DefinePS1(fpPs1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fpPs1, fpPs0); + } } return true; } -bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); - } - return true; -} - -bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { sint32 rA, frD, rB; PPC_OPC_TEMPL_X(opcode, frD, rA, rB); @@ -184,148 +90,51 @@ bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr registers IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) + if (withUpdate) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); + DefinePS0(fpPs0, frD); + if (isDouble) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); + if (withUpdate) + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE, true); + else + ppcImlGenContext->emitInst().make_fpr_r_memory_indexed(fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE, true); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + if (withUpdate) + ppcImlGenContext->emitInst().make_fpr_r_memory( fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE, true); + else + ppcImlGenContext->emitInst().make_fpr_r_memory_indexed( fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE, true); + if( ppcImlGenContext->LSQE ) + { + DefinePS1(fpPs1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fpPs1, fpPs0); + } } return true; } -bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { - sint32 rA, frD, rB; - PPC_OPC_TEMPL_X(opcode, frD, rA, rB); - if( rA == 0 ) + sint32 rA, frD; + uint32 imm; + PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fpPs0, frD); + if (withUpdate) { - debugBreakpoint(); - return false; - } - // get memory gpr registers - IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // add rB to rA (if rA != 0) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); + imm = 0; } + if (isDouble) + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE, true); else - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); - } + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE, true); return true; } -bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - assert_dbg(); - } - // get memory gpr register index - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - assert_dbg(); - } - // get memory gpr register index - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // emit load iml - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD, rB; - PPC_OPC_TEMPL_X(opcode, frD, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD, rB; - PPC_OPC_TEMPL_X(opcode, frD, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // add rB to rA (if rA != 0) - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble) { sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); @@ -337,101 +146,25 @@ bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr registers IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + if (hasUpdate) + { + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); + } + DefinePS0(fpPs0, frS); + auto mode = isDouble ? PPCREC_FPR_ST_MODE_DOUBLE : PPCREC_FPR_ST_MODE_SINGLE; if( ppcImlGenContext->LSQE ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); + if (hasUpdate) + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); + else + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - } - return true; -} - - -bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frS, rB; - PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); - // calculate EA in rA - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr register index - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr register index - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // add imm to memory register - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, frS, rB; - PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); - if( ppcImlGenContext->LSQE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); + if (hasUpdate) + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); + else + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } return true; } @@ -440,7 +173,7 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - // get memory gpr registers + DefinePS0(fpPs0, frS); IMLReg gprRegister1; IMLReg gprRegister2; if( rA != 0 ) @@ -454,12 +187,10 @@ bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); gprRegister2 = IMLREG_INVALID; } - // get fpr register index - IMLReg fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); else - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); return true; } @@ -468,13 +199,10 @@ bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprA, fprB); return true; } @@ -483,13 +211,10 @@ bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // subtract bottom double of frB from bottom double of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); return true; } @@ -504,15 +229,14 @@ bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod frA = frC; frC = temp; } - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); return true; } @@ -521,27 +245,25 @@ bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp, fprB); // move result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return true; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); // copy ps0 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // copy ps0 // divide bottom double of frD by bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); return true; } @@ -549,38 +271,37 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); // add result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - IMLReg temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // always copy ps0 and ps1 // multiply bottom double of frD with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); return true; } @@ -588,32 +309,34 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // if frB is already in frD we need a temporary register to store the product of frA*frC + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); if( frB == frD ) { - // not implemented + // if frB is already in frD we need a temporary register to store the product of frA*frC + DefineTempFPR(fprTemp, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp, fprB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return false; } - // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - IMLReg temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } - // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + // move frA to frD + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprB); return true; } @@ -621,51 +344,52 @@ bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - // hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frD].fpr); - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - //// negate frB/frD - //PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD, true); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); // sub frB from temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp, fprB); // negate result - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprTemp); // move result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - IMLReg temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprB); // negate result - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } +#define PSE_CopyResultToPs1() if( ppcImlGenContext->PSE ) \ + { \ + DefinePS1(fprDPS1, frD); \ + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDPS1, fprD); \ + } + bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB_unused, frC; @@ -678,24 +402,18 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frA = frC; frC = temp; } - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 - + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } - + PSE_CopyResultToPs1(); return true; } @@ -704,44 +422,31 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); - /*hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr); - if( hCPU->PSE ) - hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;*/ - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp, fprB); // move result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // subtract bottom double of frB from bottom double of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -757,22 +462,17 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frA = frB; frB = temp; } - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // add bottom double of frD and bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -781,20 +481,12 @@ bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frB==0); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // subtract bottom - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); - // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -802,34 +494,26 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - //FPRD(RD) = FPRD(RA) * FPRD(RC) + FPRD(RB); - //hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr; - //if( hCPU->PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - IMLReg fprRegisterTemp; + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + IMLReg fprRegisterTemp; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprRegisterTemp, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -837,33 +521,27 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - //hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //if( hCPU->PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -871,70 +549,32 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - - //[FP1(RD) = ]FP0(RD) = -(FP0(RA) * FP0(RC) - FP0(RB)); - //hCPU->fpr[frD].fp0 = (float)-(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //if( PPC_PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterTemp); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprRegisterTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); - } + if( fprD != fprRegisterTemp ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); + PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("FCMPO: Not implemented\n"); + // Not implemented return false; - - //sint32 crfD, frA, frB; - //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - //crfD >>= 2; - //IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - //IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - - //IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); - //IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); - //IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); - //IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); - - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); - //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); - - // todo - set fpscr - - //sint32 crfD, frA, frB; - //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - //crfD >>= 2; - //uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - //uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - //PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); - return true; } bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -942,18 +582,18 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; - IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); - ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); // todo: set fpscr @@ -964,9 +604,9 @@ bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 frD, rA, frB; PPC_OPC_TEMPL_X(opcode, frD, rA, frB); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); return true; } @@ -975,14 +615,11 @@ bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // abs frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ABS_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprD); return true; } @@ -991,14 +628,11 @@ bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // abs frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATIVE_ABS, fprD); return true; } @@ -1007,15 +641,12 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // copy result to top - if( ppcImlGenContext->PSE ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -1024,15 +655,12 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( fprRegisterD != fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - } - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD); - if( ppcImlGenContext->PSE ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( fprD != fprB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD); + PSE_CopyResultToPs1(); return true; } @@ -1042,17 +670,12 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); if( opcode&PPC_OPC_RC ) - { return false; - } - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // negate frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } @@ -1064,11 +687,11 @@ bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { return false; } - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprD, fprA, fprB, fprC); return true; } @@ -1076,12 +699,11 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); return true; } @@ -1089,237 +711,332 @@ bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCTIWZ, fprD, fprB); return true; } -bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue); + +void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg, sint32 clampMin, sint32 clampMax) +{ + IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + // min(reg, clampMax) + ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); // condition needs to be inverted because we skip if the condition is true + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax); + } + ); + // max(reg, clampMin) + ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin); + } + ); +} + +void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +{ + if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) + { + ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS0, gprA, imm, PPCREC_FPR_LD_MODE_SINGLE, true); + if(readPS1) + { + ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS1, gprA, imm + 4, PPCREC_FPR_LD_MODE_SINGLE, true); + } + } + if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) + { + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + if(readPS1) + { + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + } + } + else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) + { + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + if(readPS1) + { + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + } + } +} + +// PSQ_L and PSQ_LU +bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) { int rA, frD; uint32 immUnused; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - sint32 gqrIndex = ((opcode >> 12) & 7); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; - bool readPS1 = (opcode & 0x8000) == false; - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // psq load - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} + IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fprDPS0, frD); + DefinePS1(fprDPS1, frD); + if (!readPS1) + { + // if PS1 is not explicitly read then set it to 1.0 + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_LOAD_ONE, fprDPS1); + } + if (withUpdate) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); + imm = 0; + } + uint32 knownGQRValue = 0; + if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, knownGQRValue) ) + { + // generate complex dynamic handler when we dont know the GQR value ahead of time + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // extract the load type from the GQR register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); + IMLSegment* caseSegment[6]; + sint32 compareValues[6] = {0, 4, 5, 6, 7}; + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); + for (sint32 i=0; i<5; i++) + { + IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, static_cast(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1); + // create the case jump instructions here because we need to add it last + caseSegment[i]->AppendInstruction()->make_jump(); + } + return true; + } -bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - if (rA == 0) + Espresso::PSQ_LOAD_TYPE type = static_cast((knownGQRValue >> 0) & 0x7); + sint32 scale = (knownGQRValue >> 8) & 0x3F; + cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) + if (scale != 0) return false; - sint32 gqrIndex = ((opcode >> 12) & 7); - uint32 imm = opcode & 0xFFF; - if (imm & 0x800) - imm |= ~0xFFF; - - bool readPS1 = (opcode & 0x8000) == false; - - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); - - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired load - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} - -bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - uint32 imm = opcode & 0xFFF; - if (imm & 0x800) - imm |= ~0xFFF; - sint32 gqrIndex = ((opcode >> 12) & 7); - - bool storePS1 = (opcode & 0x8000) == false; - - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired store - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} - -bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - if (rA == 0) + if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) + { return false; + } + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, type, readPS1, gprA, imm, fprDPS0, fprDPS1); + return true; +} + +void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +{ + cemu_assert_debug(!storePS1 || fprDPS1.IsValid()); + if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) + { + ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS0, gprA, imm, PPCREC_FPR_ST_MODE_SINGLE, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS1, gprA, imm + 4, PPCREC_FPR_ST_MODE_SINGLE, true); + } + } + else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) + { + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); + // todo - scaling + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); + // todo - scaling + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 2, 16, true); + } + } + else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) + { + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0); + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1); + // todo - scaling + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 1, 8, true); + } + } +} + +// PSQ_ST and PSQ_STU +bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) +{ + int rA, frD; + uint32 immUnused; + PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; sint32 gqrIndex = ((opcode >> 12) & 7); - bool storePS1 = (opcode & 0x8000) == false; - IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); - IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); + IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fprDPS0, frD); + IMLReg fprDPS1 = storePS1 ? _GetFPRRegPS1(ppcImlGenContext, frD) : IMLREG_INVALID; - IMLReg fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired store - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); + if (withUpdate) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); + imm = 0; + } + + uint32 gqrValue = 0; + if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue) ) + { + // generate complex dynamic handler when we dont know the GQR value ahead of time + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // extract the load type from the GQR register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); + + IMLSegment* caseSegment[5]; + sint32 compareValues[5] = {0, 4, 5, 6, 7}; + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); + for (sint32 i=0; i<5; i++) + { + IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, static_cast(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1); + ppcImlGenContext->emitInst().make_jump(); // finalize case + } + return true; + } + + Espresso::PSQ_LOAD_TYPE type = static_cast((gqrValue >> 16) & 0x7); + sint32 scale = (gqrValue >> 24) & 0x3F; + cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) + + if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) + { + return false; + } + + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, type, storePS1, gprA, imm, fprDPS0, fprDPS1); return true; } -bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MULS0 and PS_MULS1 +bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frC; frC = (opcode>>6)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA we can multiply frD immediately and safe a copy instruction - if( frD == frA ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePSX(fprC, frC, isVariant1); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + DefineTempFPR(fprTmp0, 0); + DefineTempFPR(fprTmp1, 1); + + // todo - optimize cases where a temporary is not necessary + // todo - round fprC to 25bit accuracy + + // copy ps0 and ps1 to temporary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp1, fprAps1); + + // multiply + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp0, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp1, fprC); + + // copy back to result + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); + + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); + return true; } -bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frC; - frC = (opcode>>6)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA we can multiply frD immediately and safe a copy instruction - if( frD == frA ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} - -bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MADDS0 and PS_MADDS1 +bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); - //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} -bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frB, frC; - frC = (opcode>>6)&0x1F; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp0); - //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp1 in bottom and top half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePSX(fprC, frC, isVariant1); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + DefineTempFPR(fprTmp0, 0); + DefineTempFPR(fprTmp1, 1); + + // todo - round C to 25bit + // todo - optimize cases where a temporary is not necessary + + // copy ps0 and ps1 to temporary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp1, fprAps1); + + // multiply + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp0, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp1, fprC); + + // add + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTmp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTmp1, fprBps1); + + // copy back to result + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); + + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1331,25 +1048,34 @@ bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + if( frD == frA ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else if( frD == frB ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps1); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterA); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1361,13 +1087,20 @@ bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterA, fprRegisterB); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprAps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprAps1, fprBps1); + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1377,28 +1110,37 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frC = (opcode >> 6) & 0x1F; frA = (opcode >> 16) & 0x1F; frD = (opcode >> 21) & 0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // we need a temporary register - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA we can multiply frD immediately and safe a copy instruction + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + // todo: Optimize for when a temporary isnt necessary + // todo: Round to 25bit? + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); if (frD == frA) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); } else { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1410,28 +1152,35 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode >> 21) & 0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA we can divide frD immediately and safe a copy instruction + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + if (frD == frA) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprDps1, fprBps1); } else { - // we need a temporary register - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterA); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprAps1); // we divide temporary by frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp1, fprBps1); // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1445,33 +1194,61 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction - if( frD == frA && frD != frB ) + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + if (frD != frA && frD != frB) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + if (frD == frC) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1); + } + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); + if( frD == frA && frD != frB ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); + } + else + { + // we multiply temporary by frA + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + // add frB + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); + // copy result to frD + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + } } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1483,81 +1260,54 @@ bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction + // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction if( frD == frA && frD != frB ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } + // negate - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); // adjust accuracy //PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); // Splatoon requires that we emulate flush-to-denormals for this instruction - //PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_FLDN_TO_SINGLE_PRECISION_PAIR, fprRegisterD, false); + //ppcImlGenContext->emitInst().make_fpr_r(NULL,PPCREC_IML_OP_FPR_ROUND_FLDN_TO_SINGLE_PRECISION_PAIR, fprRegisterD, false); return true; } -bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frB, frC; - frC = (opcode>>6)&0x1F; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = (hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1); - - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} - -bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MSUB and PS_NMSUB +bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; @@ -1565,35 +1315,64 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - IMLReg fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + if (frD != frA && frD != frB) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterB); + if (frD == frC) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1); + } + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); } else { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); + if( frD == frA && frD != frB ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + } } // negate result - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterD); + if (withNegative) + { + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); + } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1604,18 +1383,27 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //float s1 = (float)hCPU->fpr[frC].fp1; - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUM0, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprBps1, frB); + DefinePS1(fprCps1, frC); + + if( frD == frA ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps1); + } + if (fprDps1 != fprCps1) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprCps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1626,18 +1414,26 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)hCPU->fpr[frC].fp0; - //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - // load registers - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUM1, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + + if (frB != frD) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); + } + else + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps0); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprCps0); + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1646,12 +1442,20 @@ bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = -hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp1 = -hCPU->fpr[frB].fp1; - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterB); + + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + if (frB != frD) + { + // copy + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); + } + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); return true; } @@ -1660,10 +1464,17 @@ bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ABS_PAIR, fprRegisterD, fprRegisterB); + + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); + + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprDps1); return true; } @@ -1675,11 +1486,16 @@ bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)hCPU->fpr[frB].fp0); //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)hCPU->fpr[frB].fp1); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FRES_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); + // adjust accuracy + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1688,13 +1504,17 @@ bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp0)); - //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp1)); - // load registers - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FRSQRTE_PAIR, fprRegisterD, fprRegisterB); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); + // adjust accuracy + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1703,14 +1523,15 @@ bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - // load registers if( frB != frD ) { - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterB); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); } return true; } @@ -1723,11 +1544,17 @@ bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SELECT_PAIR, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprDps0, fprAps0, fprBps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprDps1, fprAps1, fprBps1, fprCps1); return true; } @@ -1737,26 +1564,13 @@ bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)hCPU->fpr[frA].fp0; - //float s1 = (float)hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - if( frA == frB ) - { - // simply duplicate bottom into bottom and top of destination register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - } - else - { - // copy bottom of frB to top first - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, fprRegisterD, fprRegisterB); - // copy bottom of frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); - } + DefinePS0(frpAps0, frA); + DefinePS0(frpBps0, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0); + if (frpDps0 != frpAps0) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps0); return true; } @@ -1766,17 +1580,14 @@ bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0; - // hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); + DefinePS0(frpAps0, frA); + DefinePS1(frpBps1, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + if (frpDps0 != frpAps0) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps1); return true; } @@ -1787,33 +1598,22 @@ bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( frA == frB ) + DefinePS1(frpAps1, frA); + DefinePS0(frpBps0, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + + if (frD != frB) { - // swap bottom and top - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterA); - } - else if( frA == frD ) - { - // copy frB bottom to frD bottom - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // swap lower and upper half of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); - } - else if( frB == frD ) - { - // copy upper half of frA to upper half of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterA); - // swap lower and upper half of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); + DefineTempFPR(frpTemp, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp); } return true; } @@ -1825,73 +1625,64 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( fprRegisterA == fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - } - else if( fprRegisterD != fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); - } - else if( fprRegisterD == fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, fprRegisterD, fprRegisterA); - } - else - { - debugBreakpoint(); - return false; - } + DefinePS1(frpAps1, frA); + DefinePS1(frpBps1, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps1); return true; } bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PS_CMPO0: Not implemented\n"); + // Not implemented return false; - - sint32 crfD, frA, frB; - uint32 c=0; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - crfD = (opcode>>23)&0x7; - - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); - return true; } bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PS_CMPU0: Not implemented\n"); - return false; - sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr return true; } bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PS_CMPU1: Not implemented\n"); - return false; - sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - IMLReg fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_TOP, fprRegisterA, fprRegisterB, crfD); + + DefinePS1(fprA, frA); + DefinePS1(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); return true; -} \ No newline at end of file +} diff --git a/src/asm/CMakeLists.txt b/src/asm/CMakeLists.txt deleted file mode 100644 index 19a7ddd8..00000000 --- a/src/asm/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -project(CemuAsm C) - -if (CMAKE_OSX_ARCHITECTURES) - set(CEMU_ASM_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES}) -else() - set(CEMU_ASM_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR}) -endif() - -if (CEMU_ASM_ARCHITECTURE MATCHES "(x86)|(X86)|(amd64)|(AMD64)") - - if (WIN32) - - enable_language(C ASM_MASM) - - add_library(CemuAsm x64util_masm.asm) - set_source_files_properties(x64util_masm.asm PROPERTIES LANGUAGE ASM_MASM) - - # workaround for cr flag being passed to LINK.exe which considers it an input file and thus fails - # doesn't always seem to happen. The Windows CI builds were fine, but locally I would run into this problem - # possibly related to https://gitlab.kitware.com/cmake/cmake/-/issues/18889 - set(CMAKE_ASM_MASM_CREATE_STATIC_LIBRARY " /OUT: ") - - set_property(TARGET CemuAsm PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") - - else() - - # NASM - if (APPLE) - set(CMAKE_ASM_NASM_COMPILE_OBJECT " -g -Fdwarf -f macho64 --prefix _ -o ") - else() - set(CMAKE_ASM_NASM_COMPILE_OBJECT " -g -Fdwarf -f elf64 -o ") - endif() - set(CMAKE_ASM_NASM_LINK_EXECUTABLE "ld -fPIC -o ") - - enable_language(C ASM_NASM) - - add_library(CemuAsm x64util_nasm.asm) - set_source_files_properties(x64util_nasm.asm PROPERTIES LANGUAGE ASM_NASM) - - if (APPLE) - set_target_properties(CemuAsm PROPERTIES NASM_OBJ_FORMAT macho64) - else() - set_target_properties(CemuAsm PROPERTIES NASM_OBJ_FORMAT elf64) - endif() - set_target_properties(CemuAsm PROPERTIES LINKER_LANGUAGE C) - - endif() - -elseif(CEMU_ASM_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)") - add_library(CemuAsm stub.cpp) -else() - message(STATUS "CemuAsm - Unsupported arch: ${CEMU_ASM_ARCHITECTURE}") -endif() diff --git a/src/asm/stub.cpp b/src/asm/stub.cpp deleted file mode 100644 index 8d1c8b69..00000000 --- a/src/asm/stub.cpp +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/asm/x64util.h b/src/asm/x64util.h deleted file mode 100644 index 885c2f63..00000000 --- a/src/asm/x64util.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#if defined(ARCH_X86_64) - -extern "C" void recompiler_fres(); -extern "C" void recompiler_frsqrte(); - -#else - -// stubbed on non-x86 for now -static void recompiler_fres() -{ - cemu_assert_unimplemented(); -} -static void recompiler_frsqrte() -{ - cemu_assert_unimplemented(); -} - -#endif diff --git a/src/asm/x64util_masm.asm b/src/asm/x64util_masm.asm deleted file mode 100644 index 2587c786..00000000 --- a/src/asm/x64util_masm.asm +++ /dev/null @@ -1,233 +0,0 @@ -.code - -recompiler_fres PROC - ; store all modified registers -push rdx -push rcx -push rax -push r8 -lea r8,[asmFresLookupTable] -movq rdx, xmm15 -mov rcx,rdx -shr rcx,2Fh -mov rax,rdx -and ecx,1Fh -shr rax,25h -and eax,3FFh -imul eax,dword ptr [r8+rcx*8+4] -mov r8d,dword ptr [r8+rcx*8] -mov rcx,rdx -shr rcx,34h -inc eax -shr eax,1 -sub r8d,eax -and ecx,7FFh -jne fres_espresso_label3 -mov rax,7FF0000000000000h -or rdx,rax -movq xmm15, rdx -pop r8 -pop rax -pop rcx -pop rdx -ret -fres_espresso_label3: -cmp ecx,7FFh -jne fres_espresso_label4 -mov rax,0FFFFFFFFFFFFFh -test rax,rdx -jne fres_espresso_label1 -test rdx,rdx -jns fres_espresso_label2 -mov rax,8000000000000000h -movq xmm15, rax -pop r8 -pop rax -pop rcx -pop rdx -ret -fres_espresso_label2: -xorps xmm15,xmm15 -pop r8 -pop rax -pop rcx -pop rdx -ret -fres_espresso_label4: -mov eax,7FDh -sub eax,ecx -mov ecx,eax -mov rax,8000000000000000h -and rdx,rax -shl rcx,34h -mov eax,r8d -or rcx,rdx -shl rax,1Dh -add rcx,rax -movq xmm15, rcx -fres_espresso_label1: -pop r8 -pop rax -pop rcx -pop rdx -ret - -recompiler_fres ENDP - -asmFresLookupTable: -DD 07ff800h, 03e1h -DD 0783800h, 03a7h -DD 070ea00h, 0371h -DD 06a0800h, 0340h -DD 0638800h, 0313h -DD 05d6200h, 02eah -DD 0579000h, 02c4h -DD 0520800h, 02a0h -DD 04cc800h, 027fh -DD 047ca00h, 0261h -DD 0430800h, 0245h -DD 03e8000h, 022ah -DD 03a2c00h, 0212h -DD 0360800h, 01fbh -DD 0321400h, 01e5h -DD 02e4a00h, 01d1h -DD 02aa800h, 01beh -DD 0272c00h, 01ach -DD 023d600h, 019bh -DD 0209e00h, 018bh -DD 01d8800h, 017ch -DD 01a9000h, 016eh -DD 017ae00h, 015bh -DD 014f800h, 015bh -DD 0124400h, 0143h -DD 0fbe00h, 0143h -DD 0d3800h, 012dh -DD 0ade00h, 012dh -DD 088400h, 011ah -DD 065000h, 011ah -DD 041c00h, 0108h -DD 020c00h, 0106h - -recompiler_frsqrte PROC - ; store all modified registers -push rdx -push rcx -push rax -push r8 -push r9 -movq r8, xmm15 -mov rax,7FFFFFFFFFFFFFFFh -test rax,r8 -jne frsqrte_espresso_label1 -mov rax,0FFF0000000000000h -and r8,rax -mov rax,7FF0000000000000h -or r8,rax -movq xmm15, r8 -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret -frsqrte_espresso_label1: -mov r9,r8 -shr r9,34h -and r9d,7FFh -cmp r9d,7FFh -jne frsqrte_espresso_label2 -mov rax,0FFFFFFFFFFFFFh -test rax,r8 -jne frsqrte_espresso_label3 -test r8,r8 -js frsqrte_espresso_label4 -xorps xmm15,xmm15 -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret -frsqrte_espresso_label2: -test r8,r8 -jns frsqrte_espresso_label5 -frsqrte_espresso_label4: -mov rax,7FF8000000000000h -movq xmm15, rax -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret -frsqrte_espresso_label5: -lea rdx,[asmFrsqrteLookupTable] -mov rax,r8 -shr rax,30h -mov rcx,r8 -shr rcx,25h -and eax,1Fh -and ecx,7FFh -imul ecx,dword ptr [rdx+rax*8+4] -mov eax,dword ptr [rdx+rax*8] -sub eax,ecx -lea ecx,[r9-3FDh] -shr ecx,1 -movsxd rdx,eax -mov eax,3FFh -sub eax,ecx -shl rdx,1Ah -mov ecx,eax -mov rax,8000000000000000h -and r8,rax -shl rcx,34h -or rcx,r8 -add rdx,rcx -movq xmm15, rdx -frsqrte_espresso_label3: -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret - -recompiler_frsqrte ENDP - -asmFrsqrteLookupTable: -DD 01a7e800h, 0568h -DD 017cb800h, 04f3h -DD 01552800h, 048dh -DD 0130c000h, 0435h -DD 010f2000h, 03e7h -DD 0eff000h, 03a2h -DD 0d2e000h, 0365h -DD 0b7c000h, 032eh -DD 09e5000h, 02fch -DD 0867000h, 02d0h -DD 06ff000h, 02a8h -DD 05ab800h, 0283h -DD 046a000h, 0261h -DD 0339800h, 0243h -DD 0218800h, 0226h -DD 0105800h, 020bh -DD 03ffa000h, 07a4h -DD 03c29000h, 0700h -DD 038aa000h, 0670h -DD 03572000h, 05f2h -DD 03279000h, 0584h -DD 02fb7000h, 0524h -DD 02d26000h, 04cch -DD 02ac0000h, 047eh -DD 02881000h, 043ah -DD 02665000h, 03fah -DD 02468000h, 03c2h -DD 02287000h, 038eh -DD 020c1000h, 035eh -DD 01f12000h, 0332h -DD 01d79000h, 030ah -DD 01bf4000h, 02e6h - - - -END \ No newline at end of file diff --git a/src/asm/x64util_nasm.asm b/src/asm/x64util_nasm.asm deleted file mode 100644 index 89878f6e..00000000 --- a/src/asm/x64util_nasm.asm +++ /dev/null @@ -1,237 +0,0 @@ -DEFAULT REL - -SECTION .text - -global udiv128 -global recompiler_fres -global recompiler_frsqrte - -udiv128: - mov rax, rcx - div r8 - mov [r9], rdx - ret - -recompiler_fres: - ; store all modified registers -push rdx -push rcx -push rax -push r8 -lea r8,[asmFresLookupTable] -movq rdx, xmm15 -mov rcx,rdx -shr rcx,2Fh -mov rax,rdx -and ecx,1Fh -shr rax,25h -and eax,3FFh -imul eax,dword [r8+rcx*8+4] -mov r8d,dword [r8+rcx*8] -mov rcx,rdx -shr rcx,34h -inc eax -shr eax,1 -sub r8d,eax -and ecx,7FFh -jne fres_espresso_label3 -mov rax,7FF0000000000000h -or rdx,rax -movq xmm15, rdx -pop r8 -pop rax -pop rcx -pop rdx -ret -fres_espresso_label3: -cmp ecx,7FFh -jne fres_espresso_label4 -mov rax,0FFFFFFFFFFFFFh -test rax,rdx -jne fres_espresso_label1 -test rdx,rdx -jns fres_espresso_label2 -mov rax,8000000000000000h -movq xmm15, rax -pop r8 -pop rax -pop rcx -pop rdx -ret -fres_espresso_label2: -xorps xmm15,xmm15 -pop r8 -pop rax -pop rcx -pop rdx -ret -fres_espresso_label4: -mov eax,7FDh -sub eax,ecx -mov ecx,eax -mov rax,8000000000000000h -and rdx,rax -shl rcx,34h -mov eax,r8d -or rcx,rdx -shl rax,1Dh -add rcx,rax -movq xmm15, rcx -fres_espresso_label1: -pop r8 -pop rax -pop rcx -pop rdx -ret - -asmFresLookupTable: -DD 07ff800h, 03e1h -DD 0783800h, 03a7h -DD 070ea00h, 0371h -DD 06a0800h, 0340h -DD 0638800h, 0313h -DD 05d6200h, 02eah -DD 0579000h, 02c4h -DD 0520800h, 02a0h -DD 04cc800h, 027fh -DD 047ca00h, 0261h -DD 0430800h, 0245h -DD 03e8000h, 022ah -DD 03a2c00h, 0212h -DD 0360800h, 01fbh -DD 0321400h, 01e5h -DD 02e4a00h, 01d1h -DD 02aa800h, 01beh -DD 0272c00h, 01ach -DD 023d600h, 019bh -DD 0209e00h, 018bh -DD 01d8800h, 017ch -DD 01a9000h, 016eh -DD 017ae00h, 015bh -DD 014f800h, 015bh -DD 0124400h, 0143h -DD 0fbe00h, 0143h -DD 0d3800h, 012dh -DD 0ade00h, 012dh -DD 088400h, 011ah -DD 065000h, 011ah -DD 041c00h, 0108h -DD 020c00h, 0106h - -recompiler_frsqrte: - ; store all modified registers -push rdx -push rcx -push rax -push r8 -push r9 -movq r8, xmm15 -mov rax,7FFFFFFFFFFFFFFFh -test rax,r8 -jne frsqrte_espresso_label1 -mov rax,0FFF0000000000000h -and r8,rax -mov rax,7FF0000000000000h -or r8,rax -movq xmm15, r8 -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret -frsqrte_espresso_label1: -mov r9,r8 -shr r9,34h -and r9d,7FFh -cmp r9d,7FFh -jne frsqrte_espresso_label2 -mov rax,0FFFFFFFFFFFFFh -test rax,r8 -jne frsqrte_espresso_label3 -test r8,r8 -js frsqrte_espresso_label4 -xorps xmm15,xmm15 -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret -frsqrte_espresso_label2: -test r8,r8 -jns frsqrte_espresso_label5 -frsqrte_espresso_label4: -mov rax,7FF8000000000000h -movq xmm15, rax -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret -frsqrte_espresso_label5: -lea rdx,[asmFrsqrteLookupTable] -mov rax,r8 -shr rax,30h -mov rcx,r8 -shr rcx,25h -and eax,1Fh -and ecx,7FFh -imul ecx,dword [rdx+rax*8+4] -mov eax,dword [rdx+rax*8] -sub eax,ecx -lea ecx,[r9-3FDh] -shr ecx,1 -movsxd rdx,eax -mov eax,3FFh -sub eax,ecx -shl rdx,1Ah -mov ecx,eax -mov rax,8000000000000000h -and r8,rax -shl rcx,34h -or rcx,r8 -add rdx,rcx -movq xmm15, rdx -frsqrte_espresso_label3: -pop r9 -pop r8 -pop rax -pop rcx -pop rdx -ret - -asmFrsqrteLookupTable: -DD 01a7e800h, 0568h -DD 017cb800h, 04f3h -DD 01552800h, 048dh -DD 0130c000h, 0435h -DD 010f2000h, 03e7h -DD 0eff000h, 03a2h -DD 0d2e000h, 0365h -DD 0b7c000h, 032eh -DD 09e5000h, 02fch -DD 0867000h, 02d0h -DD 06ff000h, 02a8h -DD 05ab800h, 0283h -DD 046a000h, 0261h -DD 0339800h, 0243h -DD 0218800h, 0226h -DD 0105800h, 020bh -DD 03ffa000h, 07a4h -DD 03c29000h, 0700h -DD 038aa000h, 0670h -DD 03572000h, 05f2h -DD 03279000h, 0584h -DD 02fb7000h, 0524h -DD 02d26000h, 04cch -DD 02ac0000h, 047eh -DD 02881000h, 043ah -DD 02665000h, 03fah -DD 02468000h, 03c2h -DD 02287000h, 038eh -DD 020c1000h, 035eh -DD 01f12000h, 0332h -DD 01d79000h, 030ah -DD 01bf4000h, 02e6h