diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index c8baea5c95..59b9c69177 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -46,14 +46,6 @@ void spu_recompiler::init() } } -spu_function_t spu_recompiler::get(u32 lsa) -{ - init(); - - // Simple atomic read - return m_spurt->m_dispatcher[lsa / 4]; -} - spu_function_t spu_recompiler::compile(std::vector&& func_rv) { init(); @@ -955,7 +947,9 @@ void spu_recompiler::branch_fixed(u32 target) return; } - c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&spu_thread::jit_dispatcher) + target * 2)); + c->mov(x86::rax, imm_ptr(spu_runtime::g_dispatcher + target / 4)); + c->mov(x86::rax, x86::qword_ptr(x86::rax)); + c->mov(SPU_OFF_32(pc), target); c->cmp(SPU_OFF_32(state), 0); c->jnz(label_stop); @@ -1038,7 +1032,8 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga) { // Simply external call (return or indirect call) - c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher))); + c->mov(x86::r10, imm_ptr(spu_runtime::g_dispatcher)); + c->mov(x86::r10, x86::qword_ptr(x86::r10, addr->r64(), 1, 0)); } else { @@ -1057,7 +1052,8 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) c->lea(x86::r10, x86::qword_ptr(instr_table)); c->cmp(qw1->r32(), end - start); c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0)); - c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher))); + c->mov(*qw1, imm_ptr(spu_runtime::g_dispatcher)); + c->lea(*qw1, x86::qword_ptr(*qw1, addr->r64(), 1, 0)); c->cmovae(x86::r10, *qw1); c->mov(x86::r10, x86::qword_ptr(x86::r10)); } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index 178c1a943a..b98b140a70 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -15,8 +15,6 @@ public: virtual void init() override; - virtual spu_function_t get(u32 lsa) override; - virtual spu_function_t compile(std::vector&&) override; private: diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index e52b923930..7db917ef9f 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -23,6 +23,19 @@ const spu_decoder s_spu_iname; extern u64 get_timebased_time(); +DECLARE(spu_runtime::g_dispatcher) = [] +{ + const auto ptr = reinterpret_cast(jit_runtime::alloc(0x10000 * sizeof(void*), 8, false)); + + // Initialize lookup table + for (u32 i = 0; i < 0x10000; i++) + { + ptr[i].raw() = &spu_recompiler_base::dispatch; + } + + return ptr; +}(); + spu_cache::spu_cache(const std::string& loc) : m_file(loc, fs::read + fs::write + fs::create + fs::append) { @@ -231,12 +244,6 @@ void spu_cache::initialize() spu_runtime::spu_runtime() { - // Initialize lookup table - for (auto& v : m_dispatcher) - { - v.raw() = &spu_recompiler_base::dispatch; - } - // Initialize "empty" block m_map[std::vector()] = &spu_recompiler_base::dispatch; @@ -275,7 +282,7 @@ void spu_runtime::add(std::pair, spu_function_t>& where, if (size0 == 1) { - m_dispatcher[func[0] / 4] = compiled; + g_dispatcher[func[0] / 4] = compiled; } else { @@ -516,7 +523,7 @@ void spu_runtime::add(std::pair, spu_function_t>& where, } } - m_dispatcher[func[0] / 4] = reinterpret_cast(reinterpret_cast(wxptr)); + g_dispatcher[func[0] / 4] = reinterpret_cast(reinterpret_cast(wxptr)); } lock.unlock(); @@ -543,17 +550,8 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) #endif } - const auto func = spu.jit->get(spu.pc); - - // First attempt (load new trampoline and retry) - if (func != spu.jit_dispatcher[spu.pc / 4]) - { - spu.jit_dispatcher[spu.pc / 4] = func; - return; - } - // Second attempt (recover from the recursion after repeated unsuccessful trampoline call) - if (spu.block_counter != spu.block_recover && func != &dispatch) + if (spu.block_counter != spu.block_recover && &dispatch != spu_runtime::g_dispatcher[spu.pc / 4]) { spu.block_recover = spu.block_counter; return; @@ -561,7 +559,6 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) // Compile verify(HERE), spu.jit->compile(spu.jit->block(spu._ptr(0), spu.pc)); - spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc); // Diagnostic if (g_cfg.core.spu_block_size == spu_block_size_type::giga) @@ -579,7 +576,6 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip) { // Compile (TODO: optimize search of the existing functions) const auto func = verify(HERE, spu.jit->compile(spu.jit->block(spu._ptr(0), spu.pc))); - spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc); // Overwrite jump to this function with jump to the compiled function const s64 rel = reinterpret_cast(func) - reinterpret_cast(rip) - 5; @@ -1989,9 +1985,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto result = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->SetInsertPoint(result); m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc)); - const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher) + target * 2)); const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo()->getPointerTo(); - tail(m_ir->CreateLoad(m_ir->CreateBitCast(addr, type))); + tail(m_ir->CreateLoad(m_ir->CreateIntToPtr(m_ir->getInt64((u64)(spu_runtime::g_dispatcher + target / 4)), type))); m_ir->SetInsertPoint(cblock); return result; } @@ -2534,14 +2529,6 @@ public: } } - virtual spu_function_t get(u32 lsa) override - { - init(); - - // Simple atomic read - return m_spurt->m_dispatcher[lsa / 4]; - } - virtual spu_function_t compile(std::vector&& func_rv) override { init(); @@ -5663,7 +5650,7 @@ public: m_ir->CreateStore(addr.value, spu_ptr(&spu_thread::pc)); const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo()->getPointerTo(); - const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher))), type); + const auto disp = m_ir->CreateIntToPtr(m_ir->getInt64((u64)spu_runtime::g_dispatcher), type); const auto ad64 = m_ir->CreateZExt(addr.value, get_type()); if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe) diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index f1c3ad1bd6..10f3cbbced 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -44,9 +44,6 @@ public: // All functions std::map, spu_function_t> m_map; - // All dispatchers - std::array, 0x10000> m_dispatcher; - // Debug module output location std::string m_cache_path; @@ -59,6 +56,9 @@ public: // Add compiled function and generate trampoline if necessary void add(std::pair, spu_function_t>& where, spu_function_t compiled); + + // All dispatchers (array allocated in jit memory) + static atomic_t* const g_dispatcher; }; // SPU Recompiler instance base class @@ -100,9 +100,6 @@ public: // Initialize virtual void init() = 0; - // Get pointer to the trampoline at given position - virtual spu_function_t get(u32 lsa) = 0; - // Compile function virtual spu_function_t compile(std::vector&&) = 0; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 1867b53bce..2a6864f8cc 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -564,7 +564,7 @@ void spu_thread::cpu_task() { while (LIKELY(!state || !check_state())) { - jit_dispatcher[pc / 4](*this, vm::_ptr(offset), nullptr); + spu_runtime::g_dispatcher[pc / 4](*this, vm::_ptr(offset), nullptr); } // Print some stats @@ -696,9 +696,6 @@ spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::stri if (g_cfg.core.spu_decoder != spu_decoder_type::fast && g_cfg.core.spu_decoder != spu_decoder_type::precise) { - // Initialize lookup table - jit_dispatcher.fill(&spu_recompiler_base::dispatch); - if (g_cfg.core.spu_block_size != spu_block_size_type::safe) { // Initialize stack mirror @@ -1340,7 +1337,7 @@ bool spu_thread::process_mfc_cmd() while (true) { result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()); - + if (result < 2) { break; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 4bc071af0d..47dc7cbe04 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -584,8 +584,6 @@ public: u64 block_recover = 0; u64 block_failure = 0; - std::array jit_dispatcher; // Dispatch table for indirect calls - std::array stack_mirror; // Return address information void push_snr(u32 number, u32 value);