mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-05 14:31:17 +12:00
PPCRec: Remove redundant FPR instructions which are no longer used
This commit is contained in:
parent
bb5a7ce4ff
commit
32205a2081
7 changed files with 46 additions and 999 deletions
|
@ -609,7 +609,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
|
|||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -635,7 +635,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
|
|||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -894,7 +894,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
|
|||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -3,8 +3,6 @@
|
|||
#include "BackendX64.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
#include "asm/x64util.h" // for recompiler_fres / frsqrte
|
||||
|
||||
uint32 _regF64(IMLReg physReg);
|
||||
|
||||
uint32 _regI32(IMLReg r)
|
||||
|
@ -34,231 +32,6 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId)
|
|||
return (x86Assembler64::GPR8_REX)regId;
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, IMLReg registerGQR)
|
||||
{
|
||||
// load GQR
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
|
||||
// extract scale field and multiply by 16 to get array offset
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4);
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4));
|
||||
// multiply xmm by scale
|
||||
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_RECDATA);
|
||||
if (isLoad)
|
||||
{
|
||||
if(scalePS1)
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_ps1));
|
||||
else
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_1));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (scalePS1)
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_ps1));
|
||||
else
|
||||
x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_1));
|
||||
}
|
||||
}
|
||||
|
||||
// generate code for PSQ load for a particular type
|
||||
// if scaleGQR is -1 then a scale of 1.0 is assumed (no scale)
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID)
|
||||
{
|
||||
if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
// optimized code for ps float load
|
||||
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
|
||||
x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP);
|
||||
x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, registerXMM, registerXMM);
|
||||
// note: floats are not scaled
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg);
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP);
|
||||
x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP);
|
||||
// load constant 1.0 into lower half and upper half of temp register
|
||||
x64Gen_movddup_xmmReg_memReg64(x64GenContext, registerXMM, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1));
|
||||
// overwrite lower half with single from memory
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerXMM, REG_RESV_FPR_TEMP);
|
||||
// note: floats are not scaled
|
||||
}
|
||||
else
|
||||
{
|
||||
sint32 readSize;
|
||||
bool isSigned = false;
|
||||
if (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1)
|
||||
{
|
||||
readSize = 16;
|
||||
isSigned = true;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1)
|
||||
{
|
||||
readSize = 16;
|
||||
isSigned = false;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1)
|
||||
{
|
||||
readSize = 8;
|
||||
isSigned = true;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1)
|
||||
{
|
||||
readSize = 8;
|
||||
isSigned = false;
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
|
||||
bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1);
|
||||
for (sint32 wordIndex = 0; wordIndex < 2; wordIndex++)
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
assert_dbg();
|
||||
}
|
||||
// read from memory
|
||||
if (wordIndex == 1 && loadPS1 == false)
|
||||
{
|
||||
// store constant 1
|
||||
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * 1, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 memOffset = memImmS32 + wordIndex * (readSize / 8);
|
||||
if (readSize == 16)
|
||||
{
|
||||
// half word
|
||||
x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset);
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap
|
||||
if (isSigned)
|
||||
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
else if (readSize == 8)
|
||||
{
|
||||
// byte
|
||||
x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memOffset);
|
||||
if (isSigned)
|
||||
x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
// store
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * wordIndex, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
// convert the two integers to doubles
|
||||
x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR));
|
||||
// scale
|
||||
if (registerGQR.IsValid())
|
||||
PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR)
|
||||
{
|
||||
bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1);
|
||||
// load GQR
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
|
||||
// extract load type field
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16);
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
|
||||
// jump cases
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8
|
||||
sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16
|
||||
sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8
|
||||
sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16
|
||||
sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
// default case -> float
|
||||
|
||||
// generate cases
|
||||
uint32 jumpOffset_endOfFloat;
|
||||
uint32 jumpOffset_endOfU8;
|
||||
uint32 jumpOffset_endOfU16;
|
||||
uint32 jumpOffset_endOfS8;
|
||||
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
|
||||
// load from memory
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
|
||||
{
|
||||
|
@ -269,8 +42,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2);
|
||||
uint8 mode = imlInstruction->op_storeLoad.mode;
|
||||
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 ) // lazy hack for now. Load only one value for SINGLE_INTO_PS0
|
||||
if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0 )
|
||||
{
|
||||
// load byte swapped single into temporary FPR
|
||||
if( indexed )
|
||||
|
@ -362,25 +134,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 )
|
||||
{
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed);
|
||||
}
|
||||
else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
|
@ -388,188 +141,6 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
return true;
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR = IMLREG_INVALID)
|
||||
{
|
||||
bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1);
|
||||
bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1;
|
||||
if (registerGQR.IsValid())
|
||||
{
|
||||
// move to temporary xmm and update registerXMM
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
registerXMM = REG_RESV_FPR_TEMP;
|
||||
// apply scale
|
||||
if(isFloat == false)
|
||||
PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, false, storePS1, registerGQR);
|
||||
}
|
||||
if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0)
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
if (g_CPUFeatures.x86.movbe == false)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
{
|
||||
cemu_assert_debug(memReg != memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx);
|
||||
}
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memImmS32, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1)
|
||||
{
|
||||
if (indexed)
|
||||
assert_dbg(); // todo
|
||||
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD
|
||||
x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP);
|
||||
x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
return;
|
||||
}
|
||||
// store as integer
|
||||
// get limit from mode
|
||||
sint32 clampMin, clampMax;
|
||||
sint32 bitWriteSize;
|
||||
if (mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 )
|
||||
{
|
||||
clampMin = -128;
|
||||
clampMax = 127;
|
||||
bitWriteSize = 8;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 )
|
||||
{
|
||||
clampMin = 0;
|
||||
clampMax = 255;
|
||||
bitWriteSize = 8;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 )
|
||||
{
|
||||
clampMin = 0;
|
||||
clampMax = 0xFFFF;
|
||||
bitWriteSize = 16;
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 )
|
||||
{
|
||||
clampMin = -32768;
|
||||
clampMax = 32767;
|
||||
bitWriteSize = 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert(false);
|
||||
}
|
||||
for (sint32 valueIndex = 0; valueIndex < (storePS1?2:1); valueIndex++)
|
||||
{
|
||||
// todo - multiply by GQR scale
|
||||
if (valueIndex == 0)
|
||||
{
|
||||
// convert low half (PS0) to integer
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, registerXMM);
|
||||
}
|
||||
else
|
||||
{
|
||||
// load top half (PS1) into bottom half of temporary register
|
||||
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
// convert low half to integer
|
||||
x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
// max(i, -clampMin)
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin);
|
||||
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0);
|
||||
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin);
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex());
|
||||
// min(i, clampMax)
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax);
|
||||
sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0);
|
||||
x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax);
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
|
||||
// endian swap
|
||||
if( bitWriteSize == 16)
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
// write to memory
|
||||
if (indexed)
|
||||
assert_dbg(); // unsupported
|
||||
sint32 memOffset = memImmS32 + valueIndex * (bitWriteSize/8);
|
||||
if (bitWriteSize == 8)
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP);
|
||||
else if (bitWriteSize == 16)
|
||||
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, IMLReg registerGQR)
|
||||
{
|
||||
bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1);
|
||||
// load GQR
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, _regI32(registerGQR));
|
||||
// extract store type field
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7);
|
||||
// jump cases
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8
|
||||
sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16
|
||||
sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8
|
||||
sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16
|
||||
sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0);
|
||||
// default case -> float
|
||||
|
||||
// generate cases
|
||||
uint32 jumpOffset_endOfFloat;
|
||||
uint32 jumpOffset_endOfU8;
|
||||
uint32 jumpOffset_endOfU16;
|
||||
uint32 jumpOffset_endOfS8;
|
||||
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmp_imm32(x64GenContext, 0);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR);
|
||||
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex());
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
|
||||
// store to memory
|
||||
bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed)
|
||||
{
|
||||
|
@ -646,40 +217,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 )
|
||||
{
|
||||
cemu_assert_debug(imlInstruction->op_storeLoad.flags2.notExpanded == false);
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed);
|
||||
}
|
||||
else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 ||
|
||||
mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, imlInstruction->op_storeLoad.registerGQR);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( indexed )
|
||||
assert_dbg(); // todo
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg)
|
||||
{
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, xmmReg, xmmReg, 1);
|
||||
}
|
||||
|
||||
// FPR op FPR
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
|
@ -701,93 +246,26 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
|
|||
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
|
||||
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP )
|
||||
{
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP )
|
||||
{
|
||||
// VPUNPCKHQDQ
|
||||
if (regR == regA)
|
||||
{
|
||||
// unpack top to bottom and top
|
||||
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
//else if ( hasAVXSupport )
|
||||
//{
|
||||
// // unpack top to bottom and top with non-destructive destination
|
||||
// // update: On Ivy Bridge this causes weird stalls?
|
||||
// x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, registerResult, registerOperand, registerOperand);
|
||||
//}
|
||||
else
|
||||
{
|
||||
// move top to bottom
|
||||
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
// duplicate bottom
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM )
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM )
|
||||
{
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP )
|
||||
{
|
||||
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED )
|
||||
{
|
||||
if( regR != regA )
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
_swapPS0PS1(x64GenContext, regR);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP )
|
||||
{
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 2);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM )
|
||||
{
|
||||
// use unpckhpd here?
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regA, 3);
|
||||
_swapPS0PS1(x64GenContext, regR);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM )
|
||||
{
|
||||
x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR )
|
||||
{
|
||||
x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM )
|
||||
{
|
||||
x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR)
|
||||
{
|
||||
x64Gen_divpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM )
|
||||
{
|
||||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR )
|
||||
{
|
||||
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR )
|
||||
{
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
|
||||
{
|
||||
x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ )
|
||||
{
|
||||
x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA);
|
||||
|
@ -795,58 +273,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
|
|||
// move to FPR register
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
|
||||
{
|
||||
// move register to XMM15
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
|
||||
// call assembly routine to calculate accurate FRSQRTE result in XMM15
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte);
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP);
|
||||
|
||||
// copy result to bottom of result register
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR )
|
||||
{
|
||||
// copy register
|
||||
if( regR != regA )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
// toggle sign bits
|
||||
x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR )
|
||||
{
|
||||
// copy register
|
||||
if( regR != regA )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
}
|
||||
// set sign bit to 0
|
||||
x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair));
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR)
|
||||
{
|
||||
// calculate bottom half of result
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR)
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres);
|
||||
else
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte);
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
|
||||
// calculate top half of result
|
||||
// todo - this top to bottom copy can be optimized?
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, regA, 3);
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom
|
||||
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15
|
||||
|
||||
x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); // copy bottom to top
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_dbg();
|
||||
|
@ -895,29 +321,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
|
|||
x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR)
|
||||
{
|
||||
// registerResult = registerOperandA - registerOperandB
|
||||
if( regR == regA )
|
||||
{
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
else if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, regR, regA, regB);
|
||||
}
|
||||
else if( regR == regB )
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA);
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM )
|
||||
{
|
||||
if( regR == regA )
|
||||
|
@ -950,39 +353,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
|
|||
uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB);
|
||||
uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC);
|
||||
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 )
|
||||
{
|
||||
// todo: Investigate if there are other optimizations possible if the operand registers overlap
|
||||
// generic case
|
||||
// 1) move frA bottom to frTemp bottom and top
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
// 2) add frB (both halfs, lower half is overwritten in the next step)
|
||||
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
// 3) Interleave top of frTemp and frC
|
||||
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC);
|
||||
// todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 )
|
||||
{
|
||||
// todo: Investigate if there are other optimizations possible if the operand registers overlap
|
||||
// 1) move frA bottom to frTemp bottom and top
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
// 2) add frB (both halfs, lower half is overwritten in the next step)
|
||||
x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB);
|
||||
// 3) Copy bottom from frC
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regC);
|
||||
//// 4) Swap bottom and top half
|
||||
//x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1);
|
||||
// todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers
|
||||
x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
|
||||
//float s0 = (float)hCPU->fpr[frC].fp0;
|
||||
//float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1);
|
||||
//hCPU->fpr[frD].fp0 = s0;
|
||||
//hCPU->fpr[frD].fp1 = s1;
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM )
|
||||
if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM )
|
||||
{
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex();
|
||||
|
@ -997,38 +368,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc
|
|||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR )
|
||||
{
|
||||
// select bottom
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
|
||||
// select C bottom
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC);
|
||||
sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
|
||||
// select B bottom
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex());
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB);
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex());
|
||||
// select top
|
||||
x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); // copy top to bottom (todo: May cause stall?)
|
||||
x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0));
|
||||
sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0);
|
||||
// select C top
|
||||
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandC);
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regC, 2);
|
||||
sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex();
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0);
|
||||
// select B top
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex());
|
||||
//x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerResult, registerOperandB);
|
||||
x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, regR, regB, 2);
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex());
|
||||
}
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
|
@ -1060,13 +399,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction,
|
|||
// convert back to 64bit double
|
||||
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR )
|
||||
{
|
||||
// convert to 32bit singles
|
||||
x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
// convert back to 64bit doubles
|
||||
x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, regR, regR);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64)
|
||||
{
|
||||
// convert bottom to 64bit double
|
||||
|
|
|
@ -229,29 +229,11 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
// determine partially written result
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR2 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
|
||||
// PS1 remains the same
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
registersUsed->readGPR2 = op_storeLoad.registerData;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0:
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
|
@ -269,28 +251,11 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
// determine partially written result
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0:
|
||||
// PS1 remains the same
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerData;
|
||||
break;
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0:
|
||||
case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U16_PS0:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1:
|
||||
case PPCREC_FPR_LD_MODE_PSQ_U8_PS0:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
|
@ -302,18 +267,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR1 = op_storeLoad.registerData;
|
||||
if (op_storeLoad.registerMem.IsValid())
|
||||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
// PSQ generic stores also access GQR
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR3 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
default:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
|
@ -324,43 +277,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->readGPR2 = op_storeLoad.registerMem;
|
||||
if (op_storeLoad.registerMem2.IsValid())
|
||||
registersUsed->readGPR3 = op_storeLoad.registerMem2;
|
||||
// PSQ generic stores also access GQR
|
||||
switch (op_storeLoad.mode)
|
||||
{
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0:
|
||||
case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsValid());
|
||||
registersUsed->readGPR4 = op_storeLoad.registerGQR;
|
||||
break;
|
||||
default:
|
||||
cemu_assert_debug(op_storeLoad.registerGQR.IsInvalid());
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R_R)
|
||||
{
|
||||
// fpr operation
|
||||
if (operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ||
|
||||
operation == PPCREC_IML_OP_ASSIGN ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_ABS_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_FRES_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR)
|
||||
{
|
||||
// operand read, result written
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (
|
||||
if (
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 ||
|
||||
operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ||
|
||||
operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT
|
||||
operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ
|
||||
)
|
||||
{
|
||||
// operand read, result read and (partially) written
|
||||
|
@ -369,12 +293,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_ADD_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_SUB_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_SUB_BOTTOM)
|
||||
{
|
||||
// operand read, result read and written
|
||||
|
@ -383,14 +303,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
|
||||
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_FCMPU_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM)
|
||||
{
|
||||
// operand read, result read
|
||||
registersUsed->readGPR1 = op_fpr_r_r.regA;
|
||||
registersUsed->readGPR2 = op_fpr_r_r.regR;
|
||||
}
|
||||
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
|
||||
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
|
||||
{
|
||||
|
@ -414,8 +326,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
case PPCREC_IML_OP_FPR_SUB_BOTTOM:
|
||||
registersUsed->readGPR3 = op_fpr_r_r_r.regR;
|
||||
break;
|
||||
case PPCREC_IML_OP_FPR_SUB_PAIR:
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
@ -433,10 +343,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
case PPCREC_IML_OP_FPR_SELECT_BOTTOM:
|
||||
registersUsed->readGPR4 = op_fpr_r_r_r_r.regR;
|
||||
break;
|
||||
case PPCREC_IML_OP_FPR_SUM0:
|
||||
case PPCREC_IML_OP_FPR_SUM1:
|
||||
case PPCREC_IML_OP_FPR_SELECT_PAIR:
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
@ -448,8 +354,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 ||
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ||
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR)
|
||||
operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM)
|
||||
{
|
||||
registersUsed->readGPR1 = op_fpr_r.regR;
|
||||
registersUsed->writtenGPR1 = op_fpr_r.regR;
|
||||
|
@ -620,27 +525,23 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& tr
|
|||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
|
||||
op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable);
|
||||
op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable);
|
||||
op_storeLoad.registerGQR = replaceRegisterIdMultiple(op_storeLoad.registerGQR, translationTable);
|
||||
}
|
||||
else if (type == PPCREC_IML_TYPE_FPR_R)
|
||||
{
|
||||
|
|
|
@ -126,46 +126,22 @@ enum
|
|||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
|
||||
PPCREC_IML_OP_FPR_ADD_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_ADD_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_PAIR,
|
||||
PPCREC_IML_OP_FPR_SUB_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_MULTIPLY_PAIR,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_DIVIDE_PAIR,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated
|
||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated
|
||||
PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated
|
||||
PPCREC_IML_OP_FPR_NEGATE_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_NEGATE_PAIR,
|
||||
PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ABS_PAIR,
|
||||
PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy)
|
||||
PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register)
|
||||
PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision
|
||||
PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT,
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ,
|
||||
PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A
|
||||
PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A
|
||||
// Conversion (FPR_R_R)
|
||||
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
|
||||
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
|
||||
// PS
|
||||
PPCREC_IML_OP_FPR_SUM0,
|
||||
PPCREC_IML_OP_FPR_SUM1,
|
||||
|
||||
PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register
|
||||
|
||||
// R_R_R only
|
||||
|
||||
|
@ -297,38 +273,13 @@ enum
|
|||
{
|
||||
// fpr load
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1,
|
||||
|
||||
// fpr store
|
||||
PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0
|
||||
PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0
|
||||
|
||||
PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0
|
||||
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0,
|
||||
PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1,
|
||||
};
|
||||
|
||||
struct IMLUsedRegisters
|
||||
|
@ -468,7 +419,6 @@ struct IMLInstruction
|
|||
IMLReg registerData;
|
||||
IMLReg registerMem;
|
||||
IMLReg registerMem2;
|
||||
IMLReg registerGQR;
|
||||
uint8 copyWidth;
|
||||
struct
|
||||
{
|
||||
|
@ -476,7 +426,7 @@ struct IMLInstruction
|
|||
bool signExtend : 1;
|
||||
bool notExpanded : 1; // for floats
|
||||
}flags2;
|
||||
uint8 mode; // transfer mode (copy width, ps0/ps1 behavior)
|
||||
uint8 mode; // transfer mode
|
||||
sint32 immS32;
|
||||
}op_storeLoad;
|
||||
struct
|
||||
|
@ -760,58 +710,48 @@ struct IMLInstruction
|
|||
// FPR
|
||||
|
||||
// load from memory
|
||||
void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||
void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
if (registerGQR.IsValid())
|
||||
{
|
||||
if ( mode == 0)
|
||||
__debugbreak();
|
||||
}
|
||||
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
this->op_storeLoad.registerMem = registerMemory;
|
||||
this->op_storeLoad.registerGQR = registerGQR;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||
void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerDestination;
|
||||
this->op_storeLoad.registerMem = registerMemory1;
|
||||
this->op_storeLoad.registerMem2 = registerMemory2;
|
||||
this->op_storeLoad.registerGQR = registerGQR;
|
||||
this->op_storeLoad.immS32 = 0;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
// store to memory
|
||||
void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||
void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_STORE;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerSource;
|
||||
this->op_storeLoad.registerMem = registerMemory;
|
||||
this->op_storeLoad.registerGQR = registerGQR;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
}
|
||||
|
||||
void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||
void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian)
|
||||
{
|
||||
this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED;
|
||||
this->operation = 0;
|
||||
this->op_storeLoad.registerData = registerSource;
|
||||
this->op_storeLoad.registerMem = registerMemory1;
|
||||
this->op_storeLoad.registerMem2 = registerMemory2;
|
||||
this->op_storeLoad.registerGQR = registerGQR;
|
||||
this->op_storeLoad.immS32 = immS32;
|
||||
this->op_storeLoad.mode = mode;
|
||||
this->op_storeLoad.flags2.swapEndian = switchEndian;
|
||||
|
|
|
@ -90,21 +90,23 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
|||
*/
|
||||
void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
cemuLog_logDebugOnce(LogType::Force, "IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n");
|
||||
return;
|
||||
// for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
// {
|
||||
// for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
// {
|
||||
// IMLInstruction* imlInstruction = segIt->imlList.data() + i;
|
||||
// if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
// {
|
||||
// PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
// }
|
||||
// else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
// {
|
||||
// PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
|
||||
|
@ -224,118 +226,6 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If value of GQR can be predicted for a given PSQ load or store instruction then replace it with an optimized version
|
||||
*/
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
for(IMLInstruction& instIt : segIt->imlList)
|
||||
{
|
||||
if (instIt.type == PPCREC_IML_TYPE_FPR_LOAD || instIt.type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
if(instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 &&
|
||||
instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 )
|
||||
continue;
|
||||
// get GQR value
|
||||
cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid());
|
||||
sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR);
|
||||
cemu_assert(gqrIndex >= 0);
|
||||
if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex])
|
||||
continue;
|
||||
uint32 gqrValue;
|
||||
if (!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue))
|
||||
continue;
|
||||
|
||||
uint32 formatType = (gqrValue >> 16) & 7;
|
||||
uint32 scale = (gqrValue >> 24) & 0x3F;
|
||||
if (scale != 0)
|
||||
continue; // only generic handler supports scale
|
||||
if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
}
|
||||
else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED)
|
||||
{
|
||||
if(instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 &&
|
||||
instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
continue;
|
||||
// get GQR value
|
||||
cemu_assert_debug(instIt.op_storeLoad.registerGQR.IsValid());
|
||||
sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, instIt.op_storeLoad.registerGQR);
|
||||
cemu_assert(gqrIndex >= 0 && gqrIndex < 8);
|
||||
if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex])
|
||||
continue;
|
||||
uint32 gqrValue;
|
||||
if(!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue))
|
||||
continue;
|
||||
uint32 formatType = (gqrValue >> 16) & 7;
|
||||
uint32 scale = (gqrValue >> 24) & 0x3F;
|
||||
if (scale != 0)
|
||||
continue; // only generic handler supports scale
|
||||
if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
{
|
||||
if (formatType == 0)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1;
|
||||
else if (formatType == 4)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1;
|
||||
else if (formatType == 5)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1;
|
||||
else if (formatType == 6)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1;
|
||||
else if (formatType == 7)
|
||||
instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1;
|
||||
if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1)
|
||||
instIt.op_storeLoad.registerGQR = IMLREG_INVALID;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// analyses register dependencies across the entire function
|
||||
// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten)
|
||||
class IMLOptimizerRegIOAnalysis
|
||||
|
|
|
@ -311,10 +311,7 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
|||
// this simplifies logic during register allocation
|
||||
PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
|
||||
|
||||
// if GQRs can be predicted, optimize PSQ load/stores
|
||||
PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext);
|
||||
|
||||
// merge certain float load+store patterns (must happen before FPR register remapping)
|
||||
// merge certain float load+store patterns
|
||||
IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext);
|
||||
// delay byte swapping for certain load+store patterns
|
||||
IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext);
|
||||
|
|
|
@ -12,9 +12,7 @@ IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit)
|
|||
|
||||
#define DefinePS0(name, regIndex) IMLReg name = _GetFPRRegPS0(ppcImlGenContext, regIndex);
|
||||
#define DefinePS1(name, regIndex) IMLReg name = _GetFPRRegPS1(ppcImlGenContext, regIndex);
|
||||
|
||||
#define DefinePSX(name, regIndex, isPS1) IMLReg name = isPS1 ? _GetFPRRegPS1(ppcImlGenContext, regIndex) : _GetFPRRegPS0(ppcImlGenContext, regIndex);
|
||||
|
||||
#define DefineTempFPR(name, index) IMLReg name = _GetFPRTemp(ppcImlGenContext, index);
|
||||
|
||||
IMLReg _GetFPRRegPS0(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex)
|
||||
|
@ -51,17 +49,6 @@ void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext
|
|||
assert_dbg();
|
||||
}
|
||||
|
||||
/*
|
||||
* Rounds pair of doubles to single precision (if single precision accuracy is emulated)
|
||||
*/
|
||||
void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false)
|
||||
{
|
||||
cemu_assert_suspicious(); // should not be used any longer
|
||||
ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, fprRegister);
|
||||
if( flushDenormals )
|
||||
assert_dbg();
|
||||
}
|
||||
|
||||
bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble)
|
||||
{
|
||||
sint32 rA, frD;
|
||||
|
@ -250,7 +237,7 @@ bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
DefinePS0(fprD, frD);
|
||||
// move frA to frD (if different register)
|
||||
if( frD != frA )
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA);
|
||||
// multiply bottom double of frD with bottom double of frB
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC);
|
||||
return true;
|
||||
|
@ -268,7 +255,7 @@ bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
{
|
||||
DefineTempFPR(fprTemp, 0);
|
||||
// move frA to temporary register
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA);
|
||||
// divide bottom double of temporary register by bottom double of frB
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp, fprB);
|
||||
// move result to frD
|
||||
|
@ -296,7 +283,7 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
{
|
||||
DefineTempFPR(fprTemp, 0);
|
||||
// move frA to temporary register
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA);
|
||||
// multiply bottom double of temporary register with bottom double of frC
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC);
|
||||
// add result to frD
|
||||
|
@ -313,7 +300,7 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
}
|
||||
// move frA to frD (if different register)
|
||||
if( frD != frA )
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA); // always copy ps0 and ps1
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA); // always copy ps0 and ps1
|
||||
// multiply bottom double of frD with bottom double of frC
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC);
|
||||
// add frB
|
||||
|
@ -333,10 +320,10 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
{
|
||||
// if frB is already in frD we need a temporary register to store the product of frA*frC
|
||||
DefineTempFPR(fprTemp, 0);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprTemp, fprC);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB_BOTTOM, fprTemp, fprB);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprTemp);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprTemp);
|
||||
return false;
|
||||
}
|
||||
if( frD == frC )
|
||||
|
@ -348,7 +335,7 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
}
|
||||
// move frA to frD
|
||||
if( frD != frA )
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA);
|
||||
// multiply bottom double of frD with bottom double of frC
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC);
|
||||
// sub frB
|
||||
|
@ -423,7 +410,7 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
DefinePS0(fprD, frD);
|
||||
// move frA to frD (if different register)
|
||||
if( frD != frA )
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA);
|
||||
// multiply bottom double of frD with bottom double of frB
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprD, fprC);
|
||||
// adjust accuracy
|
||||
|
@ -445,7 +432,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
{
|
||||
DefineTempFPR(fprTemp, 0);
|
||||
// move frA to temporary register
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprTemp, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprTemp, fprA);
|
||||
// divide bottom double of temporary register by bottom double of frB
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprTemp, fprB);
|
||||
// move result to frD
|
||||
|
@ -457,7 +444,7 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
}
|
||||
// move frA to frD (if different register)
|
||||
if( frD != frA )
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA);
|
||||
// subtract bottom double of frB from bottom double of frD
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprD, fprB);
|
||||
// adjust accuracy
|
||||
|
@ -483,7 +470,7 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
|||
DefinePS0(fprD, frD);
|
||||
// move frA to frD (if different register)
|
||||
if( frD != frA )
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_ASSIGN, fprD, fprA);
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprD, fprA);
|
||||
// add bottom double of frD and bottom double of frB
|
||||
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD_BOTTOM, fprD, fprB);
|
||||
// adjust accuracy
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue