spu: Recompiler Interrupt optimizations - Pigeonhole optimize for branching pattern that is used to enable and disable interrupts used in code, this should lower amount of blocks that are compiled and avoid falling out of a block - Recompiled interupt check in some cases to stay in block instead of falling out to dispatcher

This commit is contained in:
Jake 2017-11-30 20:50:01 -06:00 committed by kd-11
parent ad97780c4f
commit 8b476b5bfa
3 changed files with 54 additions and 4 deletions

View file

@ -288,6 +288,31 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
return XmmConst(v128::fromV(data)); return XmmConst(v128::fromV(data));
} }
void spu_recompiler::CheckInterruptStatus(spu_opcode_t op)
{
if (op.d)
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
else if (op.e) {
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(*qw0, SPU_OFF_32(ch_event_stat));
c->and_(*qw0, SPU_OFF_32(ch_event_mask));
c->and_(*qw0, SPU_EVENT_INTR_TEST);
c->cmp(*qw0, 0);
asmjit::Label noInterrupt = c->newLabel();
c->je(noInterrupt);
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_32(srr0), *addr);
c->mov(SPU_OFF_32(pc), 0);
FunctionCall();
c->mov(*addr, SPU_OFF_32(srr0));
c->bind(noInterrupt);
c->unuse(*qw0);
}
}
void spu_recompiler::InterpreterCall(spu_opcode_t op) void spu_recompiler::InterpreterCall(spu_opcode_t op)
{ {
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32 auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32
@ -1013,7 +1038,7 @@ void spu_recompiler::BI(spu_opcode_t op)
{ {
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc); c->and_(*addr, 0x3fffc);
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table CheckInterruptStatus(op);
c->jmp(*jt); c->jmp(*jt);
} }
@ -1037,7 +1062,7 @@ void spu_recompiler::IRET(spu_opcode_t op)
{ {
c->mov(*addr, SPU_OFF_32(srr0)); c->mov(*addr, SPU_OFF_32(srr0));
c->and_(*addr, 0x3fffc); c->and_(*addr, 0x3fffc);
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table CheckInterruptStatus(op);
c->jmp(*jt); c->jmp(*jt);
} }

View file

@ -78,6 +78,7 @@ private:
asmjit::X86Mem XmmConst(__m128i data); asmjit::X86Mem XmmConst(__m128i data);
public: public:
void CheckInterruptStatus(spu_opcode_t op);
void InterpreterCall(spu_opcode_t op); void InterpreterCall(spu_opcode_t op);
void FunctionCall(); void FunctionCall();

View file

@ -78,6 +78,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// Minimal position of ila $SP,* instruction // Minimal position of ila $SP,* instruction
u32 ila_sp_pos = max_limit; u32 ila_sp_pos = max_limit;
// pigeonhole optimization, addr of last ila r2, addr, or 0 if last instruction was not
u32 ila_r2_addr = 0;
// Find preliminary set of possible block entries (first pass), `start` is the current block address // Find preliminary set of possible block entries (first pass), `start` is the current block address
for (u32 start = entry, pos = entry; pos < limit; pos += 4) for (u32 start = entry, pos = entry; pos < limit; pos += 4)
{ {
@ -174,10 +177,18 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
break; break;
} }
// if upcoming instruction is not BI, reset the pigeonhole optimization
// todo: can constant propogation somewhere get rid of this check?
if ((type != BI))
ila_r2_addr = 0; // reset
if (type == BI || type == IRET) // Branch Indirect if (type == BI || type == IRET) // Branch Indirect
{ {
blocks.emplace(start); blocks.emplace(start);
start = pos + 4; start = pos + 4;
if (op.ra == 2 && ila_r2_addr > entry)
blocks.emplace(ila_r2_addr);
} }
else if (type == BR || type == BRA) // Branch Relative/Absolute else if (type == BR || type == BRA) // Branch Relative/Absolute
{ {
@ -233,6 +244,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
blocks.emplace(target); blocks.emplace(target);
} }
} }
else if (type == LNOP || type == NOP) {
// theres a chance that theres some random lnops/nops after the end of a function
// havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start
// todo: remove the last added start pos as its probly unnecessary
if (pos == start)
start = pos + 4;
}
else // Other instructions (writing rt reg) else // Other instructions (writing rt reg)
{ {
const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt; const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
@ -241,9 +259,8 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
if (rt == 0) if (rt == 0)
{ {
} }
// Analyse stack pointer access // Analyse stack pointer access
if (rt == 1) else if (rt == 1)
{ {
if (type == ILA && pos < ila_sp_pos) if (type == ILA && pos < ila_sp_pos)
{ {
@ -251,6 +268,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
ila_sp_pos = pos; ila_sp_pos = pos;
} }
} }
// pigeonhole optimize
// ila r2, addr
// bi r2
else if (rt == 2) {
if (type == ILA)
ila_r2_addr = spu_branch_target(op.i18);
}
} }
} }