From 8b476b5bfa04d975dc188d7e71dd4a9748a5ffd1 Mon Sep 17 00:00:00 2001 From: Jake Date: Thu, 30 Nov 2017 20:50:01 -0600 Subject: [PATCH] spu: Recompiler Interrupt optimizations - Pigeonhole optimize for branching pattern that is used to enable and disable interrupts used in code, this should lower amount of blocks that are compiled and avoid falling out of a block - Recompiled interupt check in some cases to stay in block instead of falling out to dispatcher --- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 29 ++++++++++++++++++++++++-- rpcs3/Emu/Cell/SPUASMJITRecompiler.h | 1 + rpcs3/Emu/Cell/SPUAnalyser.cpp | 28 +++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 14e686873f..3fcb4e6bf9 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -288,6 +288,31 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data) return XmmConst(v128::fromV(data)); } +void spu_recompiler::CheckInterruptStatus(spu_opcode_t op) +{ + if (op.d) + c->lock().btr(SPU_OFF_8(interrupts_enabled), 0); + else if (op.e) { + c->lock().bts(SPU_OFF_8(interrupts_enabled), 0); + c->mov(*qw0, SPU_OFF_32(ch_event_stat)); + c->and_(*qw0, SPU_OFF_32(ch_event_mask)); + c->and_(*qw0, SPU_EVENT_INTR_TEST); + c->cmp(*qw0, 0); + + asmjit::Label noInterrupt = c->newLabel(); + c->je(noInterrupt); + c->lock().btr(SPU_OFF_8(interrupts_enabled), 0); + c->mov(SPU_OFF_32(srr0), *addr); + c->mov(SPU_OFF_32(pc), 0); + + FunctionCall(); + + c->mov(*addr, SPU_OFF_32(srr0)); + c->bind(noInterrupt); + c->unuse(*qw0); + } +} + void spu_recompiler::InterpreterCall(spu_opcode_t op) { auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32 @@ -1013,7 +1038,7 @@ void spu_recompiler::BI(spu_opcode_t op) { c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table + CheckInterruptStatus(op); c->jmp(*jt); } @@ -1037,7 +1062,7 @@ void spu_recompiler::IRET(spu_opcode_t op) { c->mov(*addr, SPU_OFF_32(srr0)); c->and_(*addr, 0x3fffc); - if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table + CheckInterruptStatus(op); c->jmp(*jt); } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index f65363a61f..adf8471919 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -78,6 +78,7 @@ private: asmjit::X86Mem XmmConst(__m128i data); public: + void CheckInterruptStatus(spu_opcode_t op); void InterpreterCall(spu_opcode_t op); void FunctionCall(); diff --git a/rpcs3/Emu/Cell/SPUAnalyser.cpp b/rpcs3/Emu/Cell/SPUAnalyser.cpp index e93abfd151..f9a7931f5f 100644 --- a/rpcs3/Emu/Cell/SPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/SPUAnalyser.cpp @@ -78,6 +78,9 @@ spu_function_t* SPUDatabase::analyse(const be_t* ls, u32 entry, u32 max_lim // Minimal position of ila $SP,* instruction u32 ila_sp_pos = max_limit; + // pigeonhole optimization, addr of last ila r2, addr, or 0 if last instruction was not + u32 ila_r2_addr = 0; + // Find preliminary set of possible block entries (first pass), `start` is the current block address for (u32 start = entry, pos = entry; pos < limit; pos += 4) { @@ -173,11 +176,19 @@ spu_function_t* SPUDatabase::analyse(const be_t* ls, u32 entry, u32 max_lim limit = pos + 4; break; } + + // if upcoming instruction is not BI, reset the pigeonhole optimization + // todo: can constant propogation somewhere get rid of this check? + if ((type != BI)) + ila_r2_addr = 0; // reset if (type == BI || type == IRET) // Branch Indirect { blocks.emplace(start); start = pos + 4; + + if (op.ra == 2 && ila_r2_addr > entry) + blocks.emplace(ila_r2_addr); } else if (type == BR || type == BRA) // Branch Relative/Absolute { @@ -233,6 +244,13 @@ spu_function_t* SPUDatabase::analyse(const be_t* ls, u32 entry, u32 max_lim blocks.emplace(target); } } + else if (type == LNOP || type == NOP) { + // theres a chance that theres some random lnops/nops after the end of a function + // havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start + // todo: remove the last added start pos as its probly unnecessary + if (pos == start) + start = pos + 4; + } else // Other instructions (writing rt reg) { const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt; @@ -241,9 +259,8 @@ spu_function_t* SPUDatabase::analyse(const be_t* ls, u32 entry, u32 max_lim if (rt == 0) { } - // Analyse stack pointer access - if (rt == 1) + else if (rt == 1) { if (type == ILA && pos < ila_sp_pos) { @@ -251,6 +268,13 @@ spu_function_t* SPUDatabase::analyse(const be_t* ls, u32 entry, u32 max_lim ila_sp_pos = pos; } } + // pigeonhole optimize + // ila r2, addr + // bi r2 + else if (rt == 2) { + if (type == ILA) + ila_r2_addr = spu_branch_target(op.i18); + } } }