mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 13:01:18 +12:00
PPCRec: Rework floating point instructions (#1554)
This commit is contained in:
parent
33d5c6d490
commit
de542410c2
22 changed files with 1428 additions and 2879 deletions
|
@ -49,7 +49,6 @@ add_subdirectory(audio)
|
|||
add_subdirectory(util)
|
||||
add_subdirectory(imgui)
|
||||
add_subdirectory(resource)
|
||||
add_subdirectory(asm)
|
||||
|
||||
add_executable(CemuBin
|
||||
main.cpp
|
||||
|
|
|
@ -548,7 +548,6 @@ else()
|
|||
endif()
|
||||
|
||||
target_link_libraries(CemuCafe PRIVATE
|
||||
CemuAsm
|
||||
CemuAudio
|
||||
CemuCommon
|
||||
CemuComponents
|
||||
|
|
|
@ -10,6 +10,18 @@ namespace Espresso
|
|||
CR_BIT_INDEX_SO = 3,
|
||||
};
|
||||
|
||||
enum class PSQ_LOAD_TYPE
|
||||
{
|
||||
TYPE_F32 = 0,
|
||||
TYPE_UNUSED1 = 1,
|
||||
TYPE_UNUSED2 = 2,
|
||||
TYPE_UNUSED3 = 3,
|
||||
TYPE_U8 = 4,
|
||||
TYPE_U16 = 5,
|
||||
TYPE_S8 = 6,
|
||||
TYPE_S16 = 7,
|
||||
};
|
||||
|
||||
enum class PrimaryOpcode
|
||||
{
|
||||
// underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#include "Cafe/HW/Espresso/Const.h"
|
||||
#include "asm/x64util.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
|
|
|
@ -609,7 +609,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
|
|||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -635,7 +635,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
|
|||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -894,7 +894,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
|
|||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -1204,9 +1204,11 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction,
|
|||
else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64)
|
||||
{
|
||||
auto regR = _regF64(imlInstruction->op_r_name.regR);
|
||||
if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32))
|
||||
if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
|
||||
{
|
||||
x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
|
||||
sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
|
||||
sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
|
||||
x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double));
|
||||
}
|
||||
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
|
||||
{
|
||||
|
@ -1281,9 +1283,11 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction,
|
|||
{
|
||||
auto regR = _regF64(imlInstruction->op_r_name.regR);
|
||||
uint32 name = imlInstruction->op_r_name.name;
|
||||
if (name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0 + 32))
|
||||
if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64))
|
||||
{
|
||||
x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * (name - PPCREC_NAME_FPR0));
|
||||
sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2;
|
||||
sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2;
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0));
|
||||
}
|
||||
else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8))
|
||||
{
|
||||
|
|
|
@ -205,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32);
|
||||
void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
|
@ -230,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist
|
|||
void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32);
|
||||
void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc);
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc);
|
||||
|
|
|
@ -3,8 +3,6 @@
|
|||
#include "BackendX64.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
#include "asm/x64util.h" // for recompiler_fres / frsqrte
|
||||
|
||||
uint32 _regF64(IMLReg physReg);
|
||||
|
||||
uint32 _regI32(IMLReg r)
|
||||
|
@ -34,231 +32,6 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
|
|||
return (x86Assembler64::GPR8_REX)regId;
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR)
|
||||
{
|
||||
// load GQR
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
|
||||
// extract scale field and multiply by 16 to get array offset
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4);
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4));
|
||||
// multiply xmm by scale
|
||||
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_RECDATA);
|
||||
if (isLoad)
|
||||
{
|
||||
if(scalePS1)
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_ps1));
|
||||
else
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_1));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (scalePS1)
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_ps1));
|
||||
else
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_1));
|
||||
}
|
||||
}
|
||||
|
||||
// generate code for PSQ load for a particular type
|
||||
// if scaleGQR is -1 then a scale of 1.0 is assumed (no scale)
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID)
|
||||
{
|
||||
if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
// optimized code for ps float load
|
||||
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
|
||||
x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP);
|
||||
x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, registerXMM, registerXMM);
|
||||
// note: floats are not scaled
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg);
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP);
|
||||
x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP);
|
||||
// load constant 1.0 into lower half and upper half of temp register
|
||||
x64Gen_movddup_xmmReg_memReg64(x64GenContext, registerXMM, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
|
||||
// overwrite lower half with single from memory
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerXMM, REG_RESV_FPR_TEMP);
|
||||
// note: floats are not scaled
|
||||
}
|
||||
else
|
||||
{
|
||||
sint32 readSize;
|
||||
bool isSigned = false;
|
||||
if (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1)
|
||||
{
|
||||
readSize = 16;
|
||||
isSigned = true;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1)
|
||||
{
|
||||
readSize = 16;
|
||||
isSigned = false;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1)
|
||||
{
|
||||
readSize = 8;
|
||||
isSigned = true;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1)
|
||||
{
|
||||
readSize = 8;
|
||||
isSigned = false;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
|
||||
bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1);
|
||||
for (sint32 wordIndex = 0; wordIndex < 2; wordIndex++)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
// read from memory
|
||||
if (wordIndex == 1 && loadPS1 == false)
|
||||
{
|
||||
// store constant 1
|
||||
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * 1, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 memOffset = memImmS32 + wordIndex * (readSize / 8);
|
||||
if (readSize == 16)
|
||||
{
|
||||
// half word
|
||||
x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset);
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap
|
||||
if (isSigned)
|
||||
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
else if (readSize == 8)
|
||||
{
|
||||
// byte
|
||||
x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset);
|
||||
if (isSigned)
|
||||
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
// store
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * wordIndex, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
// convert the two integers to doubles
|
||||
x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR));
|
||||
// scale
|
||||
if (registerGQR.IsValid())
|
||||
PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR)
|
||||
{
|
||||
bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1);
|
||||
// load GQR
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
|
||||
// extract load type field
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16);
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
|
||||
// jump cases
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8
|
||||
sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16
|
||||
sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8
|
||||
sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16
|
||||
sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
// default case -> float
|
||||
|
||||
// generate cases
|
||||
uint32 jumpOffset_endOfFloat;
|
||||
uint32 jumpOffset_endOfU8;
|
||||
uint32 jumpOffset_endOfU16;
|
||||
uint32 jumpOffset_endOfS8;
|
||||
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
|
||||
// load from memory
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
|
||||
{
|
||||
|
@ -269,7 +42,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
|
||||
uint8 mode = imlInstruction->op_storeLoad.mode;
|
||||
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 )
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE )
|
||||
{
|
||||
// load byte swapped single into temporary FPR
|
||||
if( indexed )
|
||||
|
@ -299,10 +72,9 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
else
|
||||
{
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM);
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 )
|
||||
else if( mode == PPCREC_FPR_LD_MODE_DOUBLE )
|
||||
{
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
|
@ -361,25 +133,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 )
|
||||
{
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed);
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
|
@ -387,188 +140,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
return true;
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID)
|
||||
{
|
||||
bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1);
|
||||
bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1;
|
||||
if (registerGQR.IsValid())
|
||||
{
|
||||
// move to temporary xmm and update registerXMM
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
registerXMM = REG_RESV_FPR_TEMP;
|
||||
// apply scale
|
||||
if(isFloat == false)
|
||||
PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, false, storePS1, registerGQR);
|
||||
}
|
||||
if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0)
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
if (g_CPUFeatures.x86.movbe == false)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
{
|
||||
cemu_assert_debug(memReg != memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx);
|
||||
}
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1)
|
||||
{
|
||||
if (indexed)
|
||||
assert_dbg(); // todo
|
||||
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD
|
||||
x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
|
||||
x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
return;
|
||||
}
|
||||
// store as integer
|
||||
// get limit from mode
|
||||
sint32 clampMin, clampMax;
|
||||
sint32 bitWriteSize;
|
||||
if (mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 )
|
||||
{
|
||||
clampMin = -128;
|
||||
clampMax = 127;
|
||||
bitWriteSize = 8;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 )
|
||||
{
|
||||
clampMin = 0;
|
||||
clampMax = 255;
|
||||
bitWriteSize = 8;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 )
|
||||
{
|
||||
clampMin = 0;
|
||||
clampMax = 0xFFFF;
|
||||
bitWriteSize = 16;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 )
|
||||
{
|
||||
clampMin = -32768;
|
||||
clampMax = 32767;
|
||||
bitWriteSize = 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert(false);
|
||||
}
|
||||
for (sint32 valueIndex = 0; valueIndex < (storePS1?2:1); valueIndex++)
|
||||
{
|
||||
// todo - multiply by GQR scale
|
||||
if (valueIndex == 0)
|
||||
{
|
||||
// convert low half (PS0) to integer
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, registerXMM);
|
||||
}
|
||||
else
|
||||
{
|
||||
// load top half (PS1) into bottom half of temporary register
|
||||
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
// convert low half to integer
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
// max(i, -clampMin)
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin);
|
||||
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0);
|
||||
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin);
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
|
||||
// min(i, clampMax)
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax);
|
||||
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0);
|
||||
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax);
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
|
||||
// endian swap
|
||||
if( bitWriteSize == 16)
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
// write to memory
|
||||
if (indexed)
|
||||
assert_dbg(); // unsupported
|
||||
sint32 memOffset = memImmS32 + valueIndex * (bitWriteSize/8);
|
||||
if (bitWriteSize == 8)
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP);
|
||||
else if (bitWriteSize == 16)
|
||||
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR)
|
||||
{
|
||||
bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1);
|
||||
// load GQR
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
|
||||
// extract store type field
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
|
||||
// jump cases
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8
|
||||
sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16
|
||||
sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8
|
||||
sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16
|
||||
sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
// default case -> float
|
||||
|
||||
// generate cases
|
||||
uint32 jumpOffset_endOfFloat;
|
||||
uint32 jumpOffset_endOfU8;
|
||||
uint32 jumpOffset_endOfU16;
|
||||
uint32 jumpOffset_endOfS8;
|
||||
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
|
||||
// store to memory
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
|
||||
{
|
||||
|
@ -578,7 +149,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
if( indexed )
|
||||
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
|
||||
uint8 mode = imlInstruction->op_storeLoad.mode;
|
||||
if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 )
|
||||
if( mode == PPCREC_FPR_ST_MODE_SINGLE )
|
||||
{
|
||||
if (imlInstruction->op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
|
@ -607,7 +178,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
}
|
||||
else if( mode == PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0 )
|
||||
else if( mode == PPCREC_FPR_ST_MODE_DOUBLE )
|
||||
{
|
||||
if( indexed )
|
||||
{
|
||||
|
@ -645,192 +216,61 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 )
|
||||
{
|
||||
cemu_assert_debug(imlInstruction->op_storeLoad.flags2.notExpanded == false);
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed);
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( indexed )
|
||||
assert_dbg(); // todo
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg)
|
||||
{
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, xmmReg, xmmReg, 1);
|
||||
}
|
||||
|
||||
// FPR op FPR
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT )
|
||||
{
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr);
|
||||
return;
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT )
|
||||
{
|
||||
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
|
||||
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP )
|
||||
{
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP )
|
||||
{
|
||||
// VPUNPCKHQDQ
|
||||
if (regR == regA)
|
||||
{
|
||||
// unpack top to bottom and top
|
||||
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
//else if ( hasAVXSupport )
|
||||
//{
|
||||
// // unpack top to bottom and top with non-destructive destination
|
||||
// // update: On Ivy Bridge this causes weird stalls?
|
||||
// x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, registerResult, registerOperand, registerOperand);
|
||||
//}
|
||||
else
|
||||
{
|
||||
// move top to bottom
|
||||
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
// duplicate bottom
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM )
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP )
|
||||
{
|
||||
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED )
|
||||
{
|
||||
if( regR != regA )
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
_swapPS0PS1(x64GenContext, regR);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP )
|
||||
{
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 2);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM )
|
||||
{
|
||||
// use unpckhpd here?
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 3);
|
||||
_swapPS0PS1(x64GenContext, regR);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY )
|
||||
{
|
||||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR )
|
||||
{
|
||||
x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE )
|
||||
{
|
||||
x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR)
|
||||
{
|
||||
x64Gen_divpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD )
|
||||
{
|
||||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR )
|
||||
{
|
||||
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR )
|
||||
{
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
|
||||
{
|
||||
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ )
|
||||
{
|
||||
x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
// move to FPR register
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
|
||||
{
|
||||
// move register to XMM15
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
|
||||
// call assembly routine to calculate accurate FRSQRTE result in XMM15
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte);
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP);
|
||||
|
||||
// copy result to bottom of result register
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR )
|
||||
{
|
||||
// copy register
|
||||
if( regR != regA )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
// toggle sign bits
|
||||
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR )
|
||||
{
|
||||
// copy register
|
||||
if( regR != regA )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
// set sign bit to 0
|
||||
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR)
|
||||
{
|
||||
// calculate bottom half of result
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR)
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres);
|
||||
else
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte);
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
|
||||
// calculate top half of result
|
||||
// todo - this top to bottom copy can be optimized?
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, regA, 3);
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom
|
||||
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15
|
||||
|
||||
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // copy bottom to top
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
|
@ -846,7 +286,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
|
|||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA);
|
||||
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB);
|
||||
|
||||
if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM)
|
||||
if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY)
|
||||
{
|
||||
if (regR == regA)
|
||||
{
|
||||
|
@ -862,7 +302,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
|
|||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM)
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD)
|
||||
{
|
||||
// todo: Use AVX 3-operand VADDSD if available
|
||||
if (regR == regA)
|
||||
|
@ -879,30 +319,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
|
|||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR)
|
||||
{
|
||||
// registerResult = registerOperandA - registerOperandB
|
||||
if( regR == regA )
|
||||
{
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
else if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, regR, regA, regB);
|
||||
}
|
||||
else if( regR == regB )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB )
|
||||
{
|
||||
if( regR == regA )
|
||||
{
|
||||
|
@ -934,39 +351,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
|
|||
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
|
||||
uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 )
|
||||
{
|
||||
// todo: Investigate if there are other optimizations possible if the operand registers overlap
|
||||
// generic case
|
||||
// 1) move frA bottom to frTemp bottom and top
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
// 2) add frB (both halfs, lower half is overwritten in the next step)
|
||||
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
// 3) Interleave top of frTemp and frC
|
||||
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC);
|
||||
// todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 )
|
||||
{
|
||||
// todo: Investigate if there are other optimizations possible if the operand registers overlap
|
||||
// 1) move frA bottom to frTemp bottom and top
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
// 2) add frB (both halfs, lower half is overwritten in the next step)
|
||||
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
// 3) Copy bottom from frC
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC);
|
||||
//// 4) Swap bottom and top half
|
||||
//x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1);
|
||||
// todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
|
||||
//float s0 = (float)hCPU->fpr[frC].fp0;
|
||||
//float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
|
||||
//hCPU->fpr[frD].fp0 = s0;
|
||||
//hCPU->fpr[frD].fp1 = s1;
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM )
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT )
|
||||
{
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
|
||||
|
@ -981,38 +366,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
|
|||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR )
|
||||
{
|
||||
// select bottom
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
|
||||
// select C bottom
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
|
||||
sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
|
||||
// select B bottom
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex());
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex());
|
||||
// select top
|
||||
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // copy top to bottom (todo: May cause stall?)
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
|
||||
// select C top
|
||||
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandC);
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regC, 2);
|
||||
sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
|
||||
// select B top
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex());
|
||||
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandB);
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regB, 2);
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
@ -1021,15 +374,19 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
|
|||
{
|
||||
uint32 regR = _regF64(imlInstruction->op_fpr_r.regR);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM )
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE )
|
||||
{
|
||||
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS )
|
||||
{
|
||||
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM )
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS )
|
||||
{
|
||||
x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom));
|
||||
}
|
||||
|
@ -1040,19 +397,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
|
|||
// convert back to 64bit double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR )
|
||||
{
|
||||
// convert to 32bit singles
|
||||
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
// convert back to 64bit doubles
|
||||
x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64)
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64)
|
||||
{
|
||||
// convert bottom to 64bit double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
// copy to top half
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -213,6 +213,37 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi
|
|||
}
|
||||
}
|
||||
|
||||
void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE2
|
||||
if( memRegister == X86_REG_RSP )
|
||||
{
|
||||
// MOVSD <xmm>, [RSP+<imm>]
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8);
|
||||
x64Gen_writeU8(x64GenContext, 0x24);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else if( memRegister == 15 )
|
||||
{
|
||||
// MOVSD <xmm>, [R15+<imm>]
|
||||
x64Gen_writeU8(x64GenContext, 0x36);
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x10);
|
||||
x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8);
|
||||
x64Gen_writeU32(x64GenContext, memImmU32);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
}
|
||||
|
||||
void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32)
|
||||
{
|
||||
// SSE3
|
||||
|
@ -561,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis
|
|||
x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc)
|
||||
{
|
||||
// SSE2
|
||||
x64Gen_writeU8(x64GenContext, 0xF2);
|
||||
x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false);
|
||||
x64Gen_writeU8(x64GenContext, 0x0F);
|
||||
x64Gen_writeU8(x64GenContext, 0x2A);
|
||||
x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7));
|
||||
}
|
||||
|
||||
void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc)
|
||||
{
|
||||
// SSE2
|
||||
|
|
|
@ -189,9 +189,13 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di
|
|||
{
|
||||
strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0);
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_FPR0 && inst.op_r_name.name < (PPCREC_NAME_FPR0 + 999))
|
||||
if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2))
|
||||
{
|
||||
strOutput.addFmt("f{}", inst.op_r_name.name - PPCREC_NAME_FPR0);
|
||||
strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2));
|
||||
if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1)
|
||||
strOutput.add(".ps1");
|
||||
else
|
||||
strOutput.add(".ps0");
|
||||
}
|
||||
else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999))
|
||||
{
|
||||
|
|
|
@ -226,35 +226,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
// address is in gpr register
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR1 = op_storeLoad.registerMem;
|
||||
// determine partially written result
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR2 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
|
||||
// PS1 remains the same
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
registersUsed->readGPR2 = op_storeLoad.registerData;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
|
@ -265,34 +236,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR1 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem2;
|
||||
// determine partially written result
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
|
||||
// PS1 remains the same
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerData;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE)
|
||||
{
|
||||
|
@ -300,18 +243,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
// PSQ generic stores also access GQR
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
default:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
|
@ -322,72 +253,34 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR3 = op_storeLoad.registerMem2;
|
||||
// PSQ generic stores also access GQR
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR4 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
default:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
{
|
||||
// fpr operation
|
||||
if (operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ||
|
||||
operation == PPCREC_IML_OP_ASSIGN ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_ABS_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_FRES_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR)
|
||||
{
|
||||
// operand read, result written
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 ||
|
||||
operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ||
|
||||
operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT
|
||||
if (
|
||||
operation == PPCREC_IML_OP_FPR_ASSIGN ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
|
||||
operation == PPCREC_IML_OP_FPR_FCTIWZ
|
||||
)
|
||||
{
|
||||
// operand read, result read and (partially) written
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_ADD_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_SUB_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_SUB_BOTTOM)
|
||||
else if (operation == PPCREC_IML_OP_FPR_MULTIPLY ||
|
||||
operation == PPCREC_IML_OP_FPR_DIVIDE ||
|
||||
operation == PPCREC_IML_OP_FPR_ADD ||
|
||||
operation == PPCREC_IML_OP_FPR_SUB)
|
||||
{
|
||||
// operand read, result read and written
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_FCMPU_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM)
|
||||
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
|
||||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
|
||||
{
|
||||
// operand read, result read
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r.regR;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
|
@ -398,19 +291,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR1 = op_fpr_r_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r_r.regB;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r_r.regR;
|
||||
// handle partially written result
|
||||
switch (operation)
|
||||
{
|
||||
case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM:
|
||||
case PPCREC_IML_OP_FPR_ADD_BOTTOM:
|
||||
case PPCREC_IML_OP_FPR_SUB_BOTTOM:
|
||||
registersUsed->readGPR3 = op_fpr_r_r_r.regR;
|
||||
break;
|
||||
case PPCREC_IML_OP_FPR_SUB_PAIR:
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R)
|
||||
{
|
||||
|
@ -419,33 +299,23 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR2 = op_fpr_r_r_r_r.regB;
|
||||
registersUsed->readGPR3 = op_fpr_r_r_r_r.regC;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR;
|
||||
// handle partially written result
|
||||
switch (operation)
|
||||
{
|
||||
case PPCREC_IML_OP_FPR_SELECT_BOTTOM:
|
||||
registersUsed->readGPR4 = op_fpr_r_r_r_r.regR;
|
||||
break;
|
||||
case PPCREC_IML_OP_FPR_SUM0:
|
||||
case PPCREC_IML_OP_FPR_SUM1:
|
||||
case PPCREC_IML_OP_FPR_SELECT_PAIR:
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
// fpr operation
|
||||
if (operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 ||
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR)
|
||||
if (operation == PPCREC_IML_OP_FPR_NEGATE ||
|
||||
operation == PPCREC_IML_OP_FPR_ABS ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 ||
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
|
||||
{
|
||||
registersUsed->readGPR1 = op_fpr_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE)
|
||||
{
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
@ -608,27 +478,23 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& tr
|
|||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
|
|
|
@ -126,46 +126,22 @@ enum
|
|||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_ADD_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_ADD_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_PAIR,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated
|
||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated
|
||||
PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated
|
||||
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_NEGATE_PAIR,
|
||||
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ABS_PAIR,
|
||||
PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ASSIGN,
|
||||
PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
|
||||
PPCREC_IML_OP_FPR_ADD,
|
||||
PPCREC_IML_OP_FPR_SUB,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY,
|
||||
PPCREC_IML_OP_FPR_DIVIDE,
|
||||
PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place
|
||||
PPCREC_IML_OP_FPR_NEGATE,
|
||||
PPCREC_IML_OP_FPR_ABS, // abs(fpr)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
|
||||
PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
|
||||
// PS
|
||||
PPCREC_IML_OP_FPR_SUM0,
|
||||
PPCREC_IML_OP_FPR_SUM1,
|
||||
|
||||
|
||||
// R_R_R only
|
||||
|
||||
// R_R_S32 only
|
||||
PPCREC_IML_OP_FPR_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
// Conversion (FPR_R_R)
|
||||
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
|
||||
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
|
||||
|
||||
// R_R_R + R_R_S32
|
||||
PPCREC_IML_OP_ADD, // also R_R_R_CARRY
|
||||
|
@ -275,7 +251,7 @@ enum // IMLName
|
|||
PPCREC_NAME_TEMPORARY = 1000,
|
||||
PPCREC_NAME_R0 = 2000,
|
||||
PPCREC_NAME_SPR0 = 3000,
|
||||
PPCREC_NAME_FPR0 = 4000,
|
||||
PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1
|
||||
PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7
|
||||
PPCREC_NAME_XER_CA = 6000, // carry bit from XER
|
||||
PPCREC_NAME_XER_OV = 6001, // overflow bit from XER
|
||||
|
@ -291,39 +267,14 @@ enum // IMLName
|
|||
enum
|
||||
{
|
||||
// fpr load
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_SINGLE,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE,
|
||||
|
||||
// fpr store
|
||||
PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
|
||||
PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
|
||||
PPCREC_FPR_ST_MODE_SINGLE,
|
||||
PPCREC_FPR_ST_MODE_DOUBLE,
|
||||
|
||||
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
|
||||
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
|
||||
};
|
||||
|
||||
struct IMLUsedRegisters
|
||||
|
@ -463,7 +414,6 @@ struct IMLInstruction
|
|||
IMLReg registerData;
|
||||
IMLReg registerMem;
|
||||
IMLReg registerMem2;
|
||||
IMLReg registerGQR;
|
||||
uint8 copyWidth;
|
||||
struct
|
||||
{
|
||||
|
@ -471,7 +421,7 @@ struct IMLInstruction
|
|||
bool signExtend : 1;
|
||||
bool notExpanded : 1; // for floats
|
||||
}flags2;
|
||||
uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
|
||||
uint8 mode; // transfer mode
|
||||
sint32 immS32;
|
||||
}op_storeLoad;
|
||||
struct
|
||||
|
@ -752,6 +702,56 @@ struct IMLInstruction
|
|||
this->op_call_imm.regReturn = regReturn;
|
||||
}
|
||||
|
||||
// FPR
|
||||
|
||||
// load from memory
|
||||
void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
this->op_storeLoad.registerMem = registerMemory;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
this->op_storeLoad.registerMem = registerMemory1;
|
||||
this->op_storeLoad.registerMem2 = registerMemory2;
|
||||
this->op_storeLoad.immS32 = 0;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
// store to memory
|
||||
void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_STORE;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerSource;
|
||||
this->op_storeLoad.registerMem = registerMemory;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerSource;
|
||||
this->op_storeLoad.registerMem = registerMemory1;
|
||||
this->op_storeLoad.registerMem2 = registerMemory2;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_COMPARE;
|
||||
|
@ -762,6 +762,44 @@ struct IMLInstruction
|
|||
this->op_fpr_compare.cond = cond;
|
||||
}
|
||||
|
||||
void make_fpr_r(sint32 operation, IMLReg registerResult)
|
||||
{
|
||||
// OP (fpr)
|
||||
this->type = PPCREC_IML_TYPE_FPR_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r.regR = registerResult;
|
||||
}
|
||||
|
||||
void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// fpr OP fpr
|
||||
this->type = PPCREC_IML_TYPE_FPR_R_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r_r.regR = registerResult;
|
||||
this->op_fpr_r_r.regA = registerOperand;
|
||||
}
|
||||
|
||||
void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// fpr = OP (fpr,fpr)
|
||||
this->type = PPCREC_IML_TYPE_FPR_R_R_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r_r_r.regR = registerResult;
|
||||
this->op_fpr_r_r_r.regA = registerOperand1;
|
||||
this->op_fpr_r_r_r.regB = registerOperand2;
|
||||
}
|
||||
|
||||
void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// fpr = OP (fpr,fpr,fpr)
|
||||
this->type = PPCREC_IML_TYPE_FPR_R_R_R_R;
|
||||
this->operation = operation;
|
||||
this->op_fpr_r_r_r_r.regR = registerResult;
|
||||
this->op_fpr_r_r_r_r.regA = registerOperandA;
|
||||
this->op_fpr_r_r_r_r.regB = registerOperandB;
|
||||
this->op_fpr_r_r_r_r.regC = registerOperandC;
|
||||
}
|
||||
|
||||
/* X86 specific */
|
||||
void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition)
|
||||
{
|
||||
|
|
|
@ -34,8 +34,8 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
if (imlInstruction->IsSuffixInstruction())
|
||||
break;
|
||||
// check if FPR is stored
|
||||
if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0) ||
|
||||
(imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0))
|
||||
if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) ||
|
||||
(imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE))
|
||||
{
|
||||
if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex)
|
||||
{
|
||||
|
@ -73,7 +73,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
{
|
||||
// insert expand instruction after store
|
||||
IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore);
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, _FPRRegFromID(fprIndex));
|
||||
newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,21 +90,23 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
*/
|
||||
void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
cemuLog_logDebugOnce(LogType::Force, "IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n");
|
||||
return;
|
||||
// for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
// {
|
||||
// for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
// {
|
||||
// IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
// if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
// {
|
||||
// PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
// }
|
||||
// else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
// {
|
||||
// PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
|
||||
|
@ -207,133 +209,22 @@ sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqr
|
|||
|
||||
bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue)
|
||||
{
|
||||
// UGQR 2 to 7 are initialized by the OS and we assume that games won't ever permanently touch those
|
||||
// todo - hack - replace with more accurate solution
|
||||
if (gqrIndex == 2)
|
||||
gqrValue = 0x00040004;
|
||||
else if (gqrIndex == 3)
|
||||
gqrValue = 0x00050005;
|
||||
else if (gqrIndex == 4)
|
||||
gqrValue = 0x00060006;
|
||||
else if (gqrIndex == 5)
|
||||
gqrValue = 0x00070007;
|
||||
// the default configuration is:
|
||||
// UGQR0 = 0x00000000
|
||||
// UGQR2 = 0x00040004
|
||||
// UGQR3 = 0x00050005
|
||||
// UGQR4 = 0x00060006
|
||||
// UGQR5 = 0x00070007
|
||||
// but games are free to modify UGQR2 to UGQR7 it seems.
|
||||
// no game modifies UGQR0 so it's safe enough to optimize for the default value
|
||||
// Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo
|
||||
if (gqrIndex == 0)
|
||||
gqrValue = 0x00000000;
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If value of GQR can be predicted for a given PSQ load or store instruction then replace it with an optimized version
|
||||
*/
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for(IMLInstruction& instIt : segIt->imlList)
|
||||
{
|
||||
if (instIt.type == PPCREC_IML_TYPE_FPR_LOAD || instIt.type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
if(instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 &&
|
||||
instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 )
|
||||
continue;
|
||||
// get GQR value
|
||||
cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid());
|
||||
sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR);
|
||||
cemu_assert(gqrIndex >= 0);
|
||||
if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex])
|
||||
continue;
|
||||
uint32 gqrValue;
|
||||
if (!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue))
|
||||
continue;
|
||||
|
||||
uint32 formatType = (gqrValue >> 16) & 7;
|
||||
uint32 scale = (gqrValue >> 24) & 0x3F;
|
||||
if (scale != 0)
|
||||
continue; // only generic handler supports scale
|
||||
if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
}
|
||||
else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
if(instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 &&
|
||||
instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
continue;
|
||||
// get GQR value
|
||||
cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid());
|
||||
sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR);
|
||||
cemu_assert(gqrIndex >= 0 && gqrIndex < 8);
|
||||
if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex])
|
||||
continue;
|
||||
uint32 gqrValue;
|
||||
if(!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue))
|
||||
continue;
|
||||
uint32 formatType = (gqrValue >> 16) & 7;
|
||||
uint32 scale = (gqrValue >> 24) & 0x3F;
|
||||
if (scale != 0)
|
||||
continue; // only generic handler supports scale
|
||||
if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// analyses register dependencies across the entire function
|
||||
// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten)
|
||||
class IMLOptimizerRegIOAnalysis
|
||||
|
|
|
@ -2093,7 +2093,10 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM
|
|||
cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction());
|
||||
if (imlSegment->HasSuffixInstruction())
|
||||
{
|
||||
cemu_assert_debug(!currentRange); // currentRange should be NULL?
|
||||
if (currentRange)
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress);
|
||||
}
|
||||
for (auto& remainingRange : activeRanges)
|
||||
{
|
||||
cemu_assert_debug(!remainingRange->hasStore);
|
||||
|
|
|
@ -311,10 +311,7 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
|||
// this simplifies logic during register allocation
|
||||
PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
|
||||
|
||||
// if GQRs can be predicted, optimize PSQ load/stores
|
||||
PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext);
|
||||
|
||||
// merge certain float load+store patterns (must happen before FPR register remapping)
|
||||
// merge certain float load+store patterns
|
||||
IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext);
|
||||
// delay byte swapping for certain load+store patterns
|
||||
IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext);
|
||||
|
|
|
@ -14,34 +14,20 @@ void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint3
|
|||
void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint);
|
||||
|
||||
// GPR register management
|
||||
IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
// Register management
|
||||
IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat);
|
||||
|
||||
// FPR register management
|
||||
IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName);
|
||||
|
||||
// IML instruction generation
|
||||
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet);
|
||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, IMLReg registerResult);
|
||||
|
||||
// IML generation - FPU
|
||||
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble);
|
||||
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
@ -67,22 +53,17 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
|
||||
bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate);
|
||||
bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
|
||||
bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1);
|
||||
bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative);
|
||||
bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode);
|
||||
|
@ -102,3 +83,19 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o
|
|||
// IML general
|
||||
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken);
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken); // no else segment
|
||||
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex);
|
||||
|
||||
class IMLRedirectInstOutput
|
||||
{
|
||||
public:
|
||||
IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment);
|
||||
~IMLRedirectInstOutput();
|
||||
|
||||
|
||||
private:
|
||||
ppcImlGenContext_t* m_context;
|
||||
IMLSegment* m_prevSegment;
|
||||
};
|
|
@ -87,8 +87,7 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex
|
|||
}
|
||||
|
||||
// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards
|
||||
template<typename F1n, typename F2n>
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken)
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchTaken, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken)
|
||||
{
|
||||
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
|
||||
|
||||
|
@ -118,6 +117,122 @@ void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, P
|
|||
basicBlockInfo.appendSegment = segMerge;
|
||||
}
|
||||
|
||||
void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function<void(ppcImlGenContext_t&)>& genSegmentBranchNotTaken)
|
||||
{
|
||||
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
|
||||
|
||||
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 2);
|
||||
IMLSegment* segBranchNotTaken = segments[0];
|
||||
IMLSegment* segMerge = segments[1];
|
||||
|
||||
// link the segments
|
||||
segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
|
||||
segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
|
||||
currentWriteSegment->SetLinkBranchTaken(segMerge);
|
||||
currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken);
|
||||
segBranchNotTaken->SetLinkBranchNotTaken(segMerge);
|
||||
// generate code for branch not taken segment
|
||||
ppcImlGenContext.currentOutputSegment = segBranchNotTaken;
|
||||
genSegmentBranchNotTaken(ppcImlGenContext);
|
||||
cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken);
|
||||
// make merge segment the new write segment
|
||||
ppcImlGenContext.currentOutputSegment = segMerge;
|
||||
basicBlockInfo.appendSegment = segMerge;
|
||||
}
|
||||
|
||||
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index);
|
||||
|
||||
IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext)
|
||||
{
|
||||
m_prevSegment = ppcImlGenContext->currentOutputSegment;
|
||||
cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment);
|
||||
if (outputSegment == ppcImlGenContext->currentOutputSegment)
|
||||
{
|
||||
m_prevSegment = nullptr;
|
||||
return;
|
||||
}
|
||||
m_context->currentBasicBlock->appendSegment = outputSegment;
|
||||
m_context->currentOutputSegment = outputSegment;
|
||||
}
|
||||
|
||||
IMLRedirectInstOutput::~IMLRedirectInstOutput()
|
||||
{
|
||||
if (m_prevSegment)
|
||||
{
|
||||
m_context->currentBasicBlock->appendSegment = m_prevSegment;
|
||||
m_context->currentOutputSegment = m_prevSegment;
|
||||
}
|
||||
}
|
||||
|
||||
// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count
|
||||
void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex)
|
||||
{
|
||||
IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend();
|
||||
cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction
|
||||
|
||||
const sint32 numBranchSegments = count + 1;
|
||||
const sint32 numCaseSegments = count;
|
||||
|
||||
std::span<IMLSegment*> segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1);
|
||||
IMLSegment** extraBranchSegments = segments.data();
|
||||
IMLSegment** caseSegments = segments.data() + numBranchSegments - 1;
|
||||
IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments];
|
||||
|
||||
// move links to the merge segment
|
||||
mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken());
|
||||
mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken());
|
||||
currentWriteSegment->SetLinkBranchTaken(nullptr);
|
||||
currentWriteSegment->SetLinkBranchNotTaken(nullptr);
|
||||
|
||||
for (sint32 i=0; i<numCaseSegments; i++)
|
||||
segmentsOut[i] = caseSegments[i];
|
||||
|
||||
IMLReg tmpBoolReg = _GetRegTemporaryS8(&ppcImlGenContext, 2);
|
||||
|
||||
// the first branch segment is the original current write segment
|
||||
auto GetBranchSegment = [&](sint32 index) {
|
||||
if (index == 0)
|
||||
return currentWriteSegment;
|
||||
else
|
||||
return extraBranchSegments[index - 1];
|
||||
};
|
||||
// link branch segments (taken: Link to case segment. NotTaken: Link to next branch segment. For the last one use a non-conditional jump)
|
||||
for (sint32 i=0; i<numBranchSegments; i++)
|
||||
{
|
||||
IMLSegment* seg = GetBranchSegment(i);
|
||||
if (i < numBranchSegments - 1)
|
||||
{
|
||||
cemu_assert_debug(i < numCaseSegments);
|
||||
seg->SetLinkBranchTaken(caseSegments[i]);
|
||||
seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1));
|
||||
seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ);
|
||||
seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_debug(defaultCaseIndex < numCaseSegments);
|
||||
seg->SetLinkBranchTaken(caseSegments[defaultCaseIndex]);
|
||||
seg->AppendInstruction()->make_jump();
|
||||
}
|
||||
}
|
||||
// link case segments
|
||||
for (sint32 i=0; i<numCaseSegments; i++)
|
||||
{
|
||||
IMLSegment* seg = caseSegments[i];
|
||||
if (i < numCaseSegments - 1)
|
||||
{
|
||||
seg->SetLinkBranchTaken(mergeSegment);
|
||||
// -> Jumps are added after the instructions
|
||||
}
|
||||
else
|
||||
{
|
||||
seg->SetLinkBranchTaken(mergeSegment);
|
||||
}
|
||||
}
|
||||
ppcImlGenContext.currentOutputSegment = mergeSegment;
|
||||
basicBlockInfo.appendSegment = mergeSegment;
|
||||
}
|
||||
|
||||
IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat)
|
||||
{
|
||||
auto it = ppcImlGenContext->mappedRegs.find(mappedName);
|
||||
|
@ -212,32 +327,14 @@ IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
|
|||
return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
|
||||
}
|
||||
|
||||
// get throw-away register. Only valid for the scope of a single translated instruction
|
||||
// be careful to not collide with manually loaded temporary register
|
||||
// get throw-away register
|
||||
// be careful to not collide with other temporary register
|
||||
IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index)
|
||||
{
|
||||
cemu_assert_debug(index < 4);
|
||||
return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index);
|
||||
}
|
||||
|
||||
/*
|
||||
* Loads a PPC fpr into any of the available IML FPU registers
|
||||
* If loadNew is false, it will check first if the fpr is already loaded into any IML register
|
||||
*/
|
||||
IMLReg PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew)
|
||||
{
|
||||
return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if a PPC fpr register is already loaded into any IML register
|
||||
* If not, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register)
|
||||
*/
|
||||
IMLReg PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName)
|
||||
{
|
||||
return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::F64);
|
||||
}
|
||||
|
||||
bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount)
|
||||
{
|
||||
for (sint32 i = 0; i < 6; i++)
|
||||
|
@ -1050,15 +1147,15 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
|
||||
// load masked shift factor into temporary register
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmpShiftAmount, regB, 0x3F);
|
||||
ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 32, regTmpCondBool, IMLCondition::UNSIGNED_GT);
|
||||
ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 31, regTmpCondBool, IMLCondition::UNSIGNED_GT);
|
||||
ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true);
|
||||
|
||||
PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock,
|
||||
[&](ppcImlGenContext_t& genCtx)
|
||||
{
|
||||
/* branch taken */
|
||||
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount);
|
||||
genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry
|
||||
/* branch taken, shift size 32 or above */
|
||||
genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, 31); // shift the sign bit into all the bits
|
||||
genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ);
|
||||
},
|
||||
[&](ppcImlGenContext_t& genCtx)
|
||||
{
|
||||
|
@ -1073,6 +1170,8 @@ bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, regTmpShiftAmount);
|
||||
}
|
||||
);
|
||||
if (opcode & PPC_OPC_RC)
|
||||
PPCImlGen_UpdateCR0(ppcImlGenContext, regA);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1909,23 +2008,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 12: // multiply scalar
|
||||
if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false)
|
||||
case 12: // PS_MULS0
|
||||
if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 13: // multiply scalar
|
||||
if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false)
|
||||
case 13: // PS_MULS1
|
||||
if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 14: // multiply add scalar
|
||||
if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false)
|
||||
case 14: // PS_MADDS0
|
||||
if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 15: // multiply add scalar
|
||||
if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false)
|
||||
case 15: // PS_MADDS1
|
||||
if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -1992,22 +2091,22 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 28: // multiply sub paired
|
||||
if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode) == false)
|
||||
case 28: // PS_MSUB
|
||||
if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 29: // multiply add paired
|
||||
case 29: // PS_MADD
|
||||
if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 30: // negative multiply sub paired
|
||||
if (PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext, opcode) == false)
|
||||
case 30: // PS_NMSUB
|
||||
if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 31: // negative multiply add paired
|
||||
case 31: // PS_NMADD
|
||||
if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
|
@ -2339,8 +2438,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
case 534: // LWBRX
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false);
|
||||
break;
|
||||
case 535:
|
||||
if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false)
|
||||
case 535: // LFSX
|
||||
if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2348,8 +2447,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
if (PPCRecompilerImlGen_SRW(ppcImlGenContext, opcode) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 567:
|
||||
if (PPCRecompilerImlGen_LFSUX(ppcImlGenContext, opcode) == false)
|
||||
case 567: // LFSUX
|
||||
if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2360,13 +2459,13 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
case 598:
|
||||
PPCRecompilerImlGen_SYNC(ppcImlGenContext, opcode);
|
||||
break;
|
||||
case 599:
|
||||
if (PPCRecompilerImlGen_LFDX(ppcImlGenContext, opcode) == false)
|
||||
case 599: // LFDX
|
||||
if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 631:
|
||||
if (PPCRecompilerImlGen_LFDUX(ppcImlGenContext, opcode) == false)
|
||||
case 631: // LFDUX
|
||||
if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2374,20 +2473,24 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, false, false))
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 663:
|
||||
if (PPCRecompilerImlGen_STFSX(ppcImlGenContext, opcode) == false)
|
||||
case 663: // STFSX
|
||||
if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 695:
|
||||
if (PPCRecompilerImlGen_STFSUX(ppcImlGenContext, opcode) == false)
|
||||
case 695: // STFSUX
|
||||
if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 725:
|
||||
if (PPCRecompilerImlGen_STSWI(ppcImlGenContext, opcode) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 727:
|
||||
if (PPCRecompilerImlGen_STFDX(ppcImlGenContext, opcode) == false)
|
||||
case 727: // STFDX
|
||||
if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 759: // STFDUX
|
||||
if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 790: // LHBRX
|
||||
|
@ -2488,53 +2591,53 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
case 47:
|
||||
PPCRecompilerImlGen_STMW(ppcImlGenContext, opcode);
|
||||
break;
|
||||
case 48:
|
||||
if (PPCRecompilerImlGen_LFS(ppcImlGenContext, opcode) == false)
|
||||
case 48: // LFS
|
||||
if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 49:
|
||||
if (PPCRecompilerImlGen_LFSU(ppcImlGenContext, opcode) == false)
|
||||
case 49: // LFSU
|
||||
if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 50:
|
||||
if (PPCRecompilerImlGen_LFD(ppcImlGenContext, opcode) == false)
|
||||
case 50: // LFD
|
||||
if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 51:
|
||||
if (PPCRecompilerImlGen_LFDU(ppcImlGenContext, opcode) == false)
|
||||
case 51: // LFDU
|
||||
if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 52:
|
||||
if (PPCRecompilerImlGen_STFS(ppcImlGenContext, opcode) == false)
|
||||
case 52: // STFS
|
||||
if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 53:
|
||||
if (PPCRecompilerImlGen_STFSU(ppcImlGenContext, opcode) == false)
|
||||
case 53: // STFSU
|
||||
if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 54:
|
||||
if (PPCRecompilerImlGen_STFD(ppcImlGenContext, opcode) == false)
|
||||
case 54: // STFD
|
||||
if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 55:
|
||||
if (PPCRecompilerImlGen_STFDU(ppcImlGenContext, opcode) == false)
|
||||
case 55: // STFDU
|
||||
if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 56:
|
||||
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 57:
|
||||
if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2587,12 +2690,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
}
|
||||
break;
|
||||
case 60:
|
||||
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
case 61:
|
||||
if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false)
|
||||
if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
ppcImlGenContext->hasFPUInstruction = true;
|
||||
break;
|
||||
|
@ -2702,7 +2805,6 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
}
|
||||
|
||||
// returns false if code flow is not interrupted
|
||||
// continueDefaultPath: Controls if
|
||||
bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget)
|
||||
{
|
||||
hasBranchTarget = false;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,53 +0,0 @@
|
|||
project(CemuAsm C)
|
||||
|
||||
if (CMAKE_OSX_ARCHITECTURES)
|
||||
set(CEMU_ASM_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES})
|
||||
else()
|
||||
set(CEMU_ASM_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
|
||||
if (CEMU_ASM_ARCHITECTURE MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
|
||||
|
||||
if (WIN32)
|
||||
|
||||
enable_language(C ASM_MASM)
|
||||
|
||||
add_library(CemuAsm x64util_masm.asm)
|
||||
set_source_files_properties(x64util_masm.asm PROPERTIES LANGUAGE ASM_MASM)
|
||||
|
||||
# workaround for cr flag being passed to LINK.exe which considers it an input file and thus fails
|
||||
# doesn't always seem to happen. The Windows CI builds were fine, but locally I would run into this problem
|
||||
# possibly related to https://gitlab.kitware.com/cmake/cmake/-/issues/18889
|
||||
set(CMAKE_ASM_MASM_CREATE_STATIC_LIBRARY "<CMAKE_AR> /OUT:<TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
|
||||
set_property(TARGET CemuAsm PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
else()
|
||||
|
||||
# NASM
|
||||
if (APPLE)
|
||||
set(CMAKE_ASM_NASM_COMPILE_OBJECT "<CMAKE_ASM_NASM_COMPILER> -g -Fdwarf -f macho64 --prefix _ -o <OBJECT> <SOURCE>")
|
||||
else()
|
||||
set(CMAKE_ASM_NASM_COMPILE_OBJECT "<CMAKE_ASM_NASM_COMPILER> -g -Fdwarf -f elf64 -o <OBJECT> <SOURCE>")
|
||||
endif()
|
||||
set(CMAKE_ASM_NASM_LINK_EXECUTABLE "ld <FLAGS> <CMAKE_ASM_NASM_LINK_FLAGS> <LINK_FLAGS> -fPIC <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
|
||||
enable_language(C ASM_NASM)
|
||||
|
||||
add_library(CemuAsm x64util_nasm.asm)
|
||||
set_source_files_properties(x64util_nasm.asm PROPERTIES LANGUAGE ASM_NASM)
|
||||
|
||||
if (APPLE)
|
||||
set_target_properties(CemuAsm PROPERTIES NASM_OBJ_FORMAT macho64)
|
||||
else()
|
||||
set_target_properties(CemuAsm PROPERTIES NASM_OBJ_FORMAT elf64)
|
||||
endif()
|
||||
set_target_properties(CemuAsm PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
endif()
|
||||
|
||||
elseif(CEMU_ASM_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
|
||||
add_library(CemuAsm stub.cpp)
|
||||
else()
|
||||
message(STATUS "CemuAsm - Unsupported arch: ${CEMU_ASM_ARCHITECTURE}")
|
||||
endif()
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -1,20 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
|
||||
extern "C" void recompiler_fres();
|
||||
extern "C" void recompiler_frsqrte();
|
||||
|
||||
#else
|
||||
|
||||
// stubbed on non-x86 for now
|
||||
static void recompiler_fres()
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
static void recompiler_frsqrte()
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,233 +0,0 @@
|
|||
.code
|
||||
|
||||
recompiler_fres PROC
|
||||
; store all modified registers
|
||||
push rdx
|
||||
push rcx
|
||||
push rax
|
||||
push r8
|
||||
lea r8,[asmFresLookupTable]
|
||||
movq rdx, xmm15
|
||||
mov rcx,rdx
|
||||
shr rcx,2Fh
|
||||
mov rax,rdx
|
||||
and ecx,1Fh
|
||||
shr rax,25h
|
||||
and eax,3FFh
|
||||
imul eax,dword ptr [r8+rcx*8+4]
|
||||
mov r8d,dword ptr [r8+rcx*8]
|
||||
mov rcx,rdx
|
||||
shr rcx,34h
|
||||
inc eax
|
||||
shr eax,1
|
||||
sub r8d,eax
|
||||
and ecx,7FFh
|
||||
jne fres_espresso_label3
|
||||
mov rax,7FF0000000000000h
|
||||
or rdx,rax
|
||||
movq xmm15, rdx
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
fres_espresso_label3:
|
||||
cmp ecx,7FFh
|
||||
jne fres_espresso_label4
|
||||
mov rax,0FFFFFFFFFFFFFh
|
||||
test rax,rdx
|
||||
jne fres_espresso_label1
|
||||
test rdx,rdx
|
||||
jns fres_espresso_label2
|
||||
mov rax,8000000000000000h
|
||||
movq xmm15, rax
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
fres_espresso_label2:
|
||||
xorps xmm15,xmm15
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
fres_espresso_label4:
|
||||
mov eax,7FDh
|
||||
sub eax,ecx
|
||||
mov ecx,eax
|
||||
mov rax,8000000000000000h
|
||||
and rdx,rax
|
||||
shl rcx,34h
|
||||
mov eax,r8d
|
||||
or rcx,rdx
|
||||
shl rax,1Dh
|
||||
add rcx,rax
|
||||
movq xmm15, rcx
|
||||
fres_espresso_label1:
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
|
||||
recompiler_fres ENDP
|
||||
|
||||
asmFresLookupTable:
|
||||
DD 07ff800h, 03e1h
|
||||
DD 0783800h, 03a7h
|
||||
DD 070ea00h, 0371h
|
||||
DD 06a0800h, 0340h
|
||||
DD 0638800h, 0313h
|
||||
DD 05d6200h, 02eah
|
||||
DD 0579000h, 02c4h
|
||||
DD 0520800h, 02a0h
|
||||
DD 04cc800h, 027fh
|
||||
DD 047ca00h, 0261h
|
||||
DD 0430800h, 0245h
|
||||
DD 03e8000h, 022ah
|
||||
DD 03a2c00h, 0212h
|
||||
DD 0360800h, 01fbh
|
||||
DD 0321400h, 01e5h
|
||||
DD 02e4a00h, 01d1h
|
||||
DD 02aa800h, 01beh
|
||||
DD 0272c00h, 01ach
|
||||
DD 023d600h, 019bh
|
||||
DD 0209e00h, 018bh
|
||||
DD 01d8800h, 017ch
|
||||
DD 01a9000h, 016eh
|
||||
DD 017ae00h, 015bh
|
||||
DD 014f800h, 015bh
|
||||
DD 0124400h, 0143h
|
||||
DD 0fbe00h, 0143h
|
||||
DD 0d3800h, 012dh
|
||||
DD 0ade00h, 012dh
|
||||
DD 088400h, 011ah
|
||||
DD 065000h, 011ah
|
||||
DD 041c00h, 0108h
|
||||
DD 020c00h, 0106h
|
||||
|
||||
recompiler_frsqrte PROC
|
||||
; store all modified registers
|
||||
push rdx
|
||||
push rcx
|
||||
push rax
|
||||
push r8
|
||||
push r9
|
||||
movq r8, xmm15
|
||||
mov rax,7FFFFFFFFFFFFFFFh
|
||||
test rax,r8
|
||||
jne frsqrte_espresso_label1
|
||||
mov rax,0FFF0000000000000h
|
||||
and r8,rax
|
||||
mov rax,7FF0000000000000h
|
||||
or r8,rax
|
||||
movq xmm15, r8
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
frsqrte_espresso_label1:
|
||||
mov r9,r8
|
||||
shr r9,34h
|
||||
and r9d,7FFh
|
||||
cmp r9d,7FFh
|
||||
jne frsqrte_espresso_label2
|
||||
mov rax,0FFFFFFFFFFFFFh
|
||||
test rax,r8
|
||||
jne frsqrte_espresso_label3
|
||||
test r8,r8
|
||||
js frsqrte_espresso_label4
|
||||
xorps xmm15,xmm15
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
frsqrte_espresso_label2:
|
||||
test r8,r8
|
||||
jns frsqrte_espresso_label5
|
||||
frsqrte_espresso_label4:
|
||||
mov rax,7FF8000000000000h
|
||||
movq xmm15, rax
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
frsqrte_espresso_label5:
|
||||
lea rdx,[asmFrsqrteLookupTable]
|
||||
mov rax,r8
|
||||
shr rax,30h
|
||||
mov rcx,r8
|
||||
shr rcx,25h
|
||||
and eax,1Fh
|
||||
and ecx,7FFh
|
||||
imul ecx,dword ptr [rdx+rax*8+4]
|
||||
mov eax,dword ptr [rdx+rax*8]
|
||||
sub eax,ecx
|
||||
lea ecx,[r9-3FDh]
|
||||
shr ecx,1
|
||||
movsxd rdx,eax
|
||||
mov eax,3FFh
|
||||
sub eax,ecx
|
||||
shl rdx,1Ah
|
||||
mov ecx,eax
|
||||
mov rax,8000000000000000h
|
||||
and r8,rax
|
||||
shl rcx,34h
|
||||
or rcx,r8
|
||||
add rdx,rcx
|
||||
movq xmm15, rdx
|
||||
frsqrte_espresso_label3:
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
|
||||
recompiler_frsqrte ENDP
|
||||
|
||||
asmFrsqrteLookupTable:
|
||||
DD 01a7e800h, 0568h
|
||||
DD 017cb800h, 04f3h
|
||||
DD 01552800h, 048dh
|
||||
DD 0130c000h, 0435h
|
||||
DD 010f2000h, 03e7h
|
||||
DD 0eff000h, 03a2h
|
||||
DD 0d2e000h, 0365h
|
||||
DD 0b7c000h, 032eh
|
||||
DD 09e5000h, 02fch
|
||||
DD 0867000h, 02d0h
|
||||
DD 06ff000h, 02a8h
|
||||
DD 05ab800h, 0283h
|
||||
DD 046a000h, 0261h
|
||||
DD 0339800h, 0243h
|
||||
DD 0218800h, 0226h
|
||||
DD 0105800h, 020bh
|
||||
DD 03ffa000h, 07a4h
|
||||
DD 03c29000h, 0700h
|
||||
DD 038aa000h, 0670h
|
||||
DD 03572000h, 05f2h
|
||||
DD 03279000h, 0584h
|
||||
DD 02fb7000h, 0524h
|
||||
DD 02d26000h, 04cch
|
||||
DD 02ac0000h, 047eh
|
||||
DD 02881000h, 043ah
|
||||
DD 02665000h, 03fah
|
||||
DD 02468000h, 03c2h
|
||||
DD 02287000h, 038eh
|
||||
DD 020c1000h, 035eh
|
||||
DD 01f12000h, 0332h
|
||||
DD 01d79000h, 030ah
|
||||
DD 01bf4000h, 02e6h
|
||||
|
||||
|
||||
|
||||
END
|
|
@ -1,237 +0,0 @@
|
|||
DEFAULT REL
|
||||
|
||||
SECTION .text
|
||||
|
||||
global udiv128
|
||||
global recompiler_fres
|
||||
global recompiler_frsqrte
|
||||
|
||||
udiv128:
|
||||
mov rax, rcx
|
||||
div r8
|
||||
mov [r9], rdx
|
||||
ret
|
||||
|
||||
recompiler_fres:
|
||||
; store all modified registers
|
||||
push rdx
|
||||
push rcx
|
||||
push rax
|
||||
push r8
|
||||
lea r8,[asmFresLookupTable]
|
||||
movq rdx, xmm15
|
||||
mov rcx,rdx
|
||||
shr rcx,2Fh
|
||||
mov rax,rdx
|
||||
and ecx,1Fh
|
||||
shr rax,25h
|
||||
and eax,3FFh
|
||||
imul eax,dword [r8+rcx*8+4]
|
||||
mov r8d,dword [r8+rcx*8]
|
||||
mov rcx,rdx
|
||||
shr rcx,34h
|
||||
inc eax
|
||||
shr eax,1
|
||||
sub r8d,eax
|
||||
and ecx,7FFh
|
||||
jne fres_espresso_label3
|
||||
mov rax,7FF0000000000000h
|
||||
or rdx,rax
|
||||
movq xmm15, rdx
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
fres_espresso_label3:
|
||||
cmp ecx,7FFh
|
||||
jne fres_espresso_label4
|
||||
mov rax,0FFFFFFFFFFFFFh
|
||||
test rax,rdx
|
||||
jne fres_espresso_label1
|
||||
test rdx,rdx
|
||||
jns fres_espresso_label2
|
||||
mov rax,8000000000000000h
|
||||
movq xmm15, rax
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
fres_espresso_label2:
|
||||
xorps xmm15,xmm15
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
fres_espresso_label4:
|
||||
mov eax,7FDh
|
||||
sub eax,ecx
|
||||
mov ecx,eax
|
||||
mov rax,8000000000000000h
|
||||
and rdx,rax
|
||||
shl rcx,34h
|
||||
mov eax,r8d
|
||||
or rcx,rdx
|
||||
shl rax,1Dh
|
||||
add rcx,rax
|
||||
movq xmm15, rcx
|
||||
fres_espresso_label1:
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
|
||||
asmFresLookupTable:
|
||||
DD 07ff800h, 03e1h
|
||||
DD 0783800h, 03a7h
|
||||
DD 070ea00h, 0371h
|
||||
DD 06a0800h, 0340h
|
||||
DD 0638800h, 0313h
|
||||
DD 05d6200h, 02eah
|
||||
DD 0579000h, 02c4h
|
||||
DD 0520800h, 02a0h
|
||||
DD 04cc800h, 027fh
|
||||
DD 047ca00h, 0261h
|
||||
DD 0430800h, 0245h
|
||||
DD 03e8000h, 022ah
|
||||
DD 03a2c00h, 0212h
|
||||
DD 0360800h, 01fbh
|
||||
DD 0321400h, 01e5h
|
||||
DD 02e4a00h, 01d1h
|
||||
DD 02aa800h, 01beh
|
||||
DD 0272c00h, 01ach
|
||||
DD 023d600h, 019bh
|
||||
DD 0209e00h, 018bh
|
||||
DD 01d8800h, 017ch
|
||||
DD 01a9000h, 016eh
|
||||
DD 017ae00h, 015bh
|
||||
DD 014f800h, 015bh
|
||||
DD 0124400h, 0143h
|
||||
DD 0fbe00h, 0143h
|
||||
DD 0d3800h, 012dh
|
||||
DD 0ade00h, 012dh
|
||||
DD 088400h, 011ah
|
||||
DD 065000h, 011ah
|
||||
DD 041c00h, 0108h
|
||||
DD 020c00h, 0106h
|
||||
|
||||
recompiler_frsqrte:
|
||||
; store all modified registers
|
||||
push rdx
|
||||
push rcx
|
||||
push rax
|
||||
push r8
|
||||
push r9
|
||||
movq r8, xmm15
|
||||
mov rax,7FFFFFFFFFFFFFFFh
|
||||
test rax,r8
|
||||
jne frsqrte_espresso_label1
|
||||
mov rax,0FFF0000000000000h
|
||||
and r8,rax
|
||||
mov rax,7FF0000000000000h
|
||||
or r8,rax
|
||||
movq xmm15, r8
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
frsqrte_espresso_label1:
|
||||
mov r9,r8
|
||||
shr r9,34h
|
||||
and r9d,7FFh
|
||||
cmp r9d,7FFh
|
||||
jne frsqrte_espresso_label2
|
||||
mov rax,0FFFFFFFFFFFFFh
|
||||
test rax,r8
|
||||
jne frsqrte_espresso_label3
|
||||
test r8,r8
|
||||
js frsqrte_espresso_label4
|
||||
xorps xmm15,xmm15
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
frsqrte_espresso_label2:
|
||||
test r8,r8
|
||||
jns frsqrte_espresso_label5
|
||||
frsqrte_espresso_label4:
|
||||
mov rax,7FF8000000000000h
|
||||
movq xmm15, rax
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
frsqrte_espresso_label5:
|
||||
lea rdx,[asmFrsqrteLookupTable]
|
||||
mov rax,r8
|
||||
shr rax,30h
|
||||
mov rcx,r8
|
||||
shr rcx,25h
|
||||
and eax,1Fh
|
||||
and ecx,7FFh
|
||||
imul ecx,dword [rdx+rax*8+4]
|
||||
mov eax,dword [rdx+rax*8]
|
||||
sub eax,ecx
|
||||
lea ecx,[r9-3FDh]
|
||||
shr ecx,1
|
||||
movsxd rdx,eax
|
||||
mov eax,3FFh
|
||||
sub eax,ecx
|
||||
shl rdx,1Ah
|
||||
mov ecx,eax
|
||||
mov rax,8000000000000000h
|
||||
and r8,rax
|
||||
shl rcx,34h
|
||||
or rcx,r8
|
||||
add rdx,rcx
|
||||
movq xmm15, rdx
|
||||
frsqrte_espresso_label3:
|
||||
pop r9
|
||||
pop r8
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdx
|
||||
ret
|
||||
|
||||
asmFrsqrteLookupTable:
|
||||
DD 01a7e800h, 0568h
|
||||
DD 017cb800h, 04f3h
|
||||
DD 01552800h, 048dh
|
||||
DD 0130c000h, 0435h
|
||||
DD 010f2000h, 03e7h
|
||||
DD 0eff000h, 03a2h
|
||||
DD 0d2e000h, 0365h
|
||||
DD 0b7c000h, 032eh
|
||||
DD 09e5000h, 02fch
|
||||
DD 0867000h, 02d0h
|
||||
DD 06ff000h, 02a8h
|
||||
DD 05ab800h, 0283h
|
||||
DD 046a000h, 0261h
|
||||
DD 0339800h, 0243h
|
||||
DD 0218800h, 0226h
|
||||
DD 0105800h, 020bh
|
||||
DD 03ffa000h, 07a4h
|
||||
DD 03c29000h, 0700h
|
||||
DD 038aa000h, 0670h
|
||||
DD 03572000h, 05f2h
|
||||
DD 03279000h, 0584h
|
||||
DD 02fb7000h, 0524h
|
||||
DD 02d26000h, 04cch
|
||||
DD 02ac0000h, 047eh
|
||||
DD 02881000h, 043ah
|
||||
DD 02665000h, 03fah
|
||||
DD 02468000h, 03c2h
|
||||
DD 02287000h, 038eh
|
||||
DD 020c1000h, 035eh
|
||||
DD 01f12000h, 0332h
|
||||
DD 01d79000h, 030ah
|
||||
DD 01bf4000h, 02e6h
|
Loading…
Add table
Add a link
Reference in a new issue