mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 17:58:37 +12:00
spu: Recompiler Interrupt optimizations - Pigeonhole optimize for branching pattern that is used to enable and disable interrupts used in code, this should lower amount of blocks that are compiled and avoid falling out of a block - Recompiled interupt check in some cases to stay in block instead of falling out to dispatcher
This commit is contained in:
parent
ad97780c4f
commit
8b476b5bfa
3 changed files with 54 additions and 4 deletions
|
@ -288,6 +288,31 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
|
||||||
return XmmConst(v128::fromV(data));
|
return XmmConst(v128::fromV(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void spu_recompiler::CheckInterruptStatus(spu_opcode_t op)
|
||||||
|
{
|
||||||
|
if (op.d)
|
||||||
|
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
|
||||||
|
else if (op.e) {
|
||||||
|
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
|
||||||
|
c->mov(*qw0, SPU_OFF_32(ch_event_stat));
|
||||||
|
c->and_(*qw0, SPU_OFF_32(ch_event_mask));
|
||||||
|
c->and_(*qw0, SPU_EVENT_INTR_TEST);
|
||||||
|
c->cmp(*qw0, 0);
|
||||||
|
|
||||||
|
asmjit::Label noInterrupt = c->newLabel();
|
||||||
|
c->je(noInterrupt);
|
||||||
|
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
|
||||||
|
c->mov(SPU_OFF_32(srr0), *addr);
|
||||||
|
c->mov(SPU_OFF_32(pc), 0);
|
||||||
|
|
||||||
|
FunctionCall();
|
||||||
|
|
||||||
|
c->mov(*addr, SPU_OFF_32(srr0));
|
||||||
|
c->bind(noInterrupt);
|
||||||
|
c->unuse(*qw0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void spu_recompiler::InterpreterCall(spu_opcode_t op)
|
void spu_recompiler::InterpreterCall(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32
|
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32
|
||||||
|
@ -1013,7 +1038,7 @@ void spu_recompiler::BI(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
CheckInterruptStatus(op);
|
||||||
c->jmp(*jt);
|
c->jmp(*jt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1037,7 +1062,7 @@ void spu_recompiler::IRET(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
c->mov(*addr, SPU_OFF_32(srr0));
|
c->mov(*addr, SPU_OFF_32(srr0));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
CheckInterruptStatus(op);
|
||||||
c->jmp(*jt);
|
c->jmp(*jt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,7 @@ private:
|
||||||
asmjit::X86Mem XmmConst(__m128i data);
|
asmjit::X86Mem XmmConst(__m128i data);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
void CheckInterruptStatus(spu_opcode_t op);
|
||||||
void InterpreterCall(spu_opcode_t op);
|
void InterpreterCall(spu_opcode_t op);
|
||||||
void FunctionCall();
|
void FunctionCall();
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||||
// Minimal position of ila $SP,* instruction
|
// Minimal position of ila $SP,* instruction
|
||||||
u32 ila_sp_pos = max_limit;
|
u32 ila_sp_pos = max_limit;
|
||||||
|
|
||||||
|
// pigeonhole optimization, addr of last ila r2, addr, or 0 if last instruction was not
|
||||||
|
u32 ila_r2_addr = 0;
|
||||||
|
|
||||||
// Find preliminary set of possible block entries (first pass), `start` is the current block address
|
// Find preliminary set of possible block entries (first pass), `start` is the current block address
|
||||||
for (u32 start = entry, pos = entry; pos < limit; pos += 4)
|
for (u32 start = entry, pos = entry; pos < limit; pos += 4)
|
||||||
{
|
{
|
||||||
|
@ -174,10 +177,18 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if upcoming instruction is not BI, reset the pigeonhole optimization
|
||||||
|
// todo: can constant propogation somewhere get rid of this check?
|
||||||
|
if ((type != BI))
|
||||||
|
ila_r2_addr = 0; // reset
|
||||||
|
|
||||||
if (type == BI || type == IRET) // Branch Indirect
|
if (type == BI || type == IRET) // Branch Indirect
|
||||||
{
|
{
|
||||||
blocks.emplace(start);
|
blocks.emplace(start);
|
||||||
start = pos + 4;
|
start = pos + 4;
|
||||||
|
|
||||||
|
if (op.ra == 2 && ila_r2_addr > entry)
|
||||||
|
blocks.emplace(ila_r2_addr);
|
||||||
}
|
}
|
||||||
else if (type == BR || type == BRA) // Branch Relative/Absolute
|
else if (type == BR || type == BRA) // Branch Relative/Absolute
|
||||||
{
|
{
|
||||||
|
@ -233,6 +244,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||||
blocks.emplace(target);
|
blocks.emplace(target);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (type == LNOP || type == NOP) {
|
||||||
|
// theres a chance that theres some random lnops/nops after the end of a function
|
||||||
|
// havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start
|
||||||
|
// todo: remove the last added start pos as its probly unnecessary
|
||||||
|
if (pos == start)
|
||||||
|
start = pos + 4;
|
||||||
|
}
|
||||||
else // Other instructions (writing rt reg)
|
else // Other instructions (writing rt reg)
|
||||||
{
|
{
|
||||||
const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
||||||
|
@ -241,9 +259,8 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||||
if (rt == 0)
|
if (rt == 0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
// Analyse stack pointer access
|
// Analyse stack pointer access
|
||||||
if (rt == 1)
|
else if (rt == 1)
|
||||||
{
|
{
|
||||||
if (type == ILA && pos < ila_sp_pos)
|
if (type == ILA && pos < ila_sp_pos)
|
||||||
{
|
{
|
||||||
|
@ -251,6 +268,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||||
ila_sp_pos = pos;
|
ila_sp_pos = pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// pigeonhole optimize
|
||||||
|
// ila r2, addr
|
||||||
|
// bi r2
|
||||||
|
else if (rt == 2) {
|
||||||
|
if (type == ILA)
|
||||||
|
ila_r2_addr = spu_branch_target(op.i18);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue