From 525453794fddbbdbadeac943aa2302739bc00729 Mon Sep 17 00:00:00 2001 From: Eladash Date: Fri, 8 May 2020 20:41:15 +0300 Subject: [PATCH] SPU/PPU reservations: Optimizations part 1 - Implement vm::reservation_trylock, optimized locking on reservation stores with no waiting. Always fail if reservation lock bitsa are set. - Make SPU accurate GET transfers on non-TSX not modify reservation lock bits. - Add some optimization regarding to unmodified data reservations writes. --- rpcs3/Emu/Cell/PPUThread.cpp | 45 ++++--- rpcs3/Emu/Cell/SPUThread.cpp | 208 +++++++++++++++++++----------- rpcs3/Emu/Memory/vm_reservation.h | 10 ++ 3 files changed, 169 insertions(+), 94 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index def044e1c1..56d6a1f162 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1214,10 +1214,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value) constexpr u64 size_off = (sizeof(T) * 8) & 63; const T old_data = static_cast(ppu.rdata << ((addr & 7) * 8) >> size_off); + auto& res = vm::reservation_acquire(addr, sizeof(T)); - if (ppu.raddr != addr || addr % sizeof(T) || old_data != data.load() || ppu.rtime != (vm::reservation_acquire(addr, sizeof(T)) & -128)) + if (std::exchange(ppu.raddr, 0) != addr || addr % sizeof(T) || old_data != data || ppu.rtime != res) { - ppu.raddr = 0; + return false; + } + + if (reg_value == old_data) + { + if (res.compare_and_swap_test(ppu.rtime, ppu.rtime + 128)) + { + res.notify_all(); + return true; + } + return false; } @@ -1230,27 +1241,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value) case 0: { // Reservation lost - ppu.raddr = 0; return false; } case 1: { - vm::reservation_notifier(addr, sizeof(T)).notify_all(); - ppu.raddr = 0; + res.notify_all(); return true; } } - auto& res = vm::reservation_acquire(addr, sizeof(T)); - - ppu.raddr = 0; - - if (res == ppu.rtime && res.compare_and_swap_test(ppu.rtime, ppu.rtime | 1)) + if (res == ppu.rtime && vm::reservation_trylock(res, ppu.rtime)) { if (data.compare_and_swap_test(old_data, reg_value)) { res += 127; - vm::reservation_notifier(addr, sizeof(T)).notify_all(); + res.notify_all(); return true; } @@ -1260,25 +1265,23 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value) return false; } - vm::passive_unlock(ppu); + if (!vm::reservation_trylock(res, ppu.rtime)) + { + return false; + } - auto& res = vm::reservation_lock(addr, sizeof(T)); - const u64 old_time = res.load() & -128; - - const bool result = ppu.rtime == old_time && data.compare_and_swap_test(old_data, reg_value); + const bool result = data.compare_and_swap_test(old_data, reg_value); if (result) { - res.release(old_time + 128); - vm::reservation_notifier(addr, sizeof(T)).notify_all(); + res.release(ppu.rtime + 128); + res.notify_all(); } else { - res.release(old_time); + res.release(ppu.rtime); } - vm::passive_lock(ppu); - ppu.raddr = 0; return result; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index c41452ad18..7d64cd8625 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1251,6 +1251,77 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) if (!g_use_rtm && (!is_get || g_cfg.core.spu_accurate_putlluc)) [[unlikely]] { + if (const u32 size = args.size; ((eal & 127) + size) <= 128 && is_get) + { + for (u64 i = 0;; [&]() + { + if (++i < 25) [[likely]] + { + busy_wait(300); + } + else + { + std::this_thread::yield(); + } + }()) + { + const u64 time0 = vm::reservation_acquire(eal, size); + + if (time0 & 1) + { + continue; + } + + switch (size) + { + case 1: + { + *reinterpret_cast(dst) = *reinterpret_cast(src); + break; + } + case 2: + { + *reinterpret_cast(dst) = *reinterpret_cast(src); + break; + } + case 4: + { + *reinterpret_cast(dst) = *reinterpret_cast(src); + break; + } + case 8: + { + *reinterpret_cast(dst) = *reinterpret_cast(src); + break; + } + default: + { + auto _dst = dst; + auto _src = src; + auto _size = size; + + while (_size) + { + *reinterpret_cast(_dst) = *reinterpret_cast(_src); + + _dst += 16; + _src += 16; + _size -= 16; + } + + break; + } + } + + if (time0 != vm::reservation_acquire(eal, size)) + { + continue; + } + + return; + } + } + switch (u32 size = args.size) { case 1: @@ -1592,7 +1663,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) if (raddr && addr == raddr) { // Last check for event before we clear the reservation - if ((vm::reservation_acquire(addr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref(addr))) + if (vm::reservation_acquire(addr, 128) != rtime || !cmp_rdata(rdata, vm::_ref(addr))) { ch_event_stat |= SPU_EVENT_LR; } @@ -1808,12 +1879,6 @@ bool spu_thread::process_mfc_cmd() const u32 addr = ch_mfc_cmd.eal & -128; const auto& data = vm::_ref(addr); - if (addr == raddr && !g_use_rtm && g_cfg.core.spu_loop_detection && rtime == vm::reservation_acquire(addr, 128) && cmp_rdata(rdata, data)) - { - // Spinning, might as well yield cpu resources - std::this_thread::yield(); - } - auto& dst = _ref(ch_mfc_cmd.lsa & 0x3ff80); u64 ntime; @@ -1873,7 +1938,7 @@ bool spu_thread::process_mfc_cmd() if (raddr && raddr != addr) { // Last check for event before we replace the reservation with a new one - if ((vm::reservation_acquire(raddr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref(raddr))) + if (vm::reservation_acquire(raddr, 128) != rtime || !cmp_rdata(rdata, vm::_ref(raddr))) { ch_event_stat |= SPU_EVENT_LR; } @@ -1899,93 +1964,90 @@ bool spu_thread::process_mfc_cmd() { // Store conditionally const u32 addr = ch_mfc_cmd.eal & -128; - u32 result = 0; - if (raddr == addr) + if ([&]() { + if (raddr != addr) + { + return false; + } + const auto& to_write = _ref(ch_mfc_cmd.lsa & 0x3ff80); + auto& res = vm::reservation_acquire(addr, 128); + + if (!g_use_rtm && rtime != res) + { + return false; + } + + if (cmp_rdata(to_write, rdata)) + { + // Writeback of unchanged data. Only check memory change + return cmp_rdata(rdata, vm::_ref(addr)) && res.compare_and_swap_test(rtime, rtime + 128); + } if (g_use_rtm) [[likely]] { - result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()); - - if (result == 2) + switch (spu_putllc_tx(addr, rtime, rdata.data(), to_write.data())) + { + case 2: { - result = 0; - cpu_thread::suspend_all cpu_lock(this); // Give up if PUTLLUC happened - if (vm::reservation_acquire(addr, 128) == (rtime | 1)) + if (res == (rtime | 1)) { auto& data = vm::_ref(addr); - if ((vm::reservation_acquire(addr, 128) & -128) == rtime && cmp_rdata(rdata, data)) + if (cmp_rdata(rdata, data)) { mov_rdata(data, to_write); - vm::reservation_acquire(addr, 128) += 127; - result = 1; - } - else - { - vm::reservation_acquire(addr, 128) -= 1; + res += 127; + return true; } } - else - { - vm::reservation_acquire(addr, 128) -= 1; - } + + res -= 1; + return false; + } + case 1: return true; + case 0: return false; + default: ASSUME(0); } } - else if (auto& data = vm::_ref(addr); rtime == (vm::reservation_acquire(raddr, 128) & -128)) + + if (!vm::reservation_trylock(res, rtime)) { - if (cmp_rdata(rdata, to_write)) - { - // Writeback of unchanged data. Only check memory change - result = cmp_rdata(rdata, data) && vm::reservation_acquire(raddr, 128).compare_and_swap_test(rtime, rtime + 128); - } - else - { - auto& res = vm::reservation_lock(raddr, 128); - const u64 old_time = res.load() & -128; - - if (rtime == old_time) - { - *reinterpret_cast*>(&data) += 0; - - const auto render = get_rsx_if_needs_res_pause(addr); - - if (render) render->pause(); - - auto& super_data = *vm::get_super_ptr(addr); - { - // Full lock (heavyweight) - // TODO: vm::check_addr - vm::writer_lock lock(addr); - - if (cmp_rdata(rdata, super_data)) - { - mov_rdata(super_data, to_write); - res.release(old_time + 128); - result = 1; - } - else - { - res.release(old_time); - } - } - - if (render) render->unpause(); - } - else - { - res.release(old_time); - } - } + return false; } - } - if (result) + vm::_ref>(addr) += 0; + + const auto render = get_rsx_if_needs_res_pause(addr); + + if (render) render->pause(); + + auto& super_data = *vm::get_super_ptr(addr); + const bool success = [&]() + { + // Full lock (heavyweight) + // TODO: vm::check_addr + vm::writer_lock lock(addr); + + if (cmp_rdata(rdata, super_data)) + { + mov_rdata(super_data, to_write); + res.release(rtime + 128); + return true; + } + + res.release(rtime); + return false; + }(); + + if (render) render->unpause(); + return success; + }()) { vm::reservation_notifier(addr, 128).notify_all(); ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS); diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index 3c15d40447..bcb0b54dc8 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -40,4 +40,14 @@ namespace vm return res; } + inline bool reservation_trylock(atomic_t& res, u64 rtime) + { + if (res.compare_and_swap_test(rtime, rtime | 1)) [[likely]] + { + return true; + } + + return false; + } + } // namespace vm