diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index c34a0ab203..5f2c2ab6c0 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -944,7 +944,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) ppu.raddr = addr; - while (g_use_rtm) + while (LIKELY(g_use_rtm)) { ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); ppu.rdata = data; @@ -959,30 +959,46 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) } } - // Do several attemps - for (uint i = 0; i < 5; i++) + ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); + + if (LIKELY((ppu.rtime & 1) == 0)) + { + ppu.rdata = data; + + if (LIKELY(vm::reservation_acquire(addr, sizeof(T)) == ppu.rtime)) + { + return static_cast(ppu.rdata); + } + } + + vm::temporary_unlock(ppu); + + for (u64 i = 0;; i++) { ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); - _mm_lfence(); - // Check LSB: atomic store may be in progress if (LIKELY((ppu.rtime & 1) == 0)) { ppu.rdata = data; - _mm_lfence(); if (LIKELY(vm::reservation_acquire(addr, sizeof(T)) == ppu.rtime)) { - return static_cast(ppu.rdata); + break; } } - busy_wait(300); + if (i < 20) + { + busy_wait(300); + } + else + { + std::this_thread::yield(); + } } - vm::reader_lock lock; - ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); - ppu.rdata = data; + ppu.cpu_mem(); + return static_cast(ppu.rdata); } @@ -1053,7 +1069,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) return false; } - if (g_use_rtm) + if (LIKELY(g_use_rtm)) { if (ppu_stwcx_tx(addr, ppu.rtime, ppu.rdata, reg_value)) { @@ -1067,16 +1083,23 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) return false; } - vm::writer_lock lock(0); + vm::temporary_unlock(ppu); - const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast(ppu.rdata), reg_value); + auto& res = vm::reservation_lock(addr, sizeof(u32)); + + const bool result = ppu.rtime == (res & ~1ull) && data.compare_and_swap_test(static_cast(ppu.rdata), reg_value); if (result) { vm::reservation_update(addr, sizeof(u32)); vm::reservation_notifier(addr, sizeof(u32)).notify_all(); } + else + { + res &= ~1ull; + } + ppu.cpu_mem(); ppu.raddr = 0; return result; } @@ -1138,7 +1161,7 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value) return false; } - if (g_use_rtm) + if (LIKELY(g_use_rtm)) { if (ppu_stdcx_tx(addr, ppu.rtime, ppu.rdata, reg_value)) { @@ -1152,16 +1175,23 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value) return false; } - vm::writer_lock lock(0); + vm::temporary_unlock(ppu); - const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value); + auto& res = vm::reservation_lock(addr, sizeof(u64)); + + const bool result = ppu.rtime == (res & ~1ull) && data.compare_and_swap_test(ppu.rdata, reg_value); if (result) { vm::reservation_update(addr, sizeof(u64)); vm::reservation_notifier(addr, sizeof(u64)).notify_all(); } + else + { + res &= ~1ull; + } + ppu.cpu_mem(); ppu.raddr = 0; return result; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 0f9c938d23..8c8fbe1200 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -806,6 +806,79 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) void* dst = vm::base(eal); void* src = vm::base(offset + lsa); + if (UNLIKELY(!is_get && !g_use_rtm)) + { + switch (u32 size = args.size) + { + case 1: + { + auto& res = vm::reservation_lock(eal, 1); + *static_cast(dst) = *static_cast(src); + res &= ~1ull; + break; + } + case 2: + { + auto& res = vm::reservation_lock(eal, 2); + *static_cast(dst) = *static_cast(src); + res &= ~1ull; + break; + } + case 4: + { + auto& res = vm::reservation_lock(eal, 4); + *static_cast(dst) = *static_cast(src); + res &= ~1ull; + break; + } + case 8: + { + auto& res = vm::reservation_lock(eal, 8); + *static_cast(dst) = *static_cast(src); + res &= ~1ull; + break; + } + case 16: + { + auto& res = vm::reservation_lock(eal, 16); + _mm_store_si128(static_cast<__m128i*>(dst), _mm_load_si128(static_cast(src))); + res &= ~1ull; + break; + } + default: + { + auto* res = &vm::reservation_lock(eal, 16); + auto vdst = static_cast<__m128i*>(dst); + auto vsrc = static_cast(src); + + for (u32 addr = eal, end = eal + size;; vdst++, vsrc++) + { + _mm_store_si128(vdst, _mm_load_si128(vsrc)); + + addr += 16; + + if (addr == end) + { + break; + } + + if (addr % 128) + { + continue; + } + + res->fetch_and(~1ull); + res = &vm::reservation_lock(addr, 16); + } + + res->fetch_and(~1ull); + break; + } + } + + return; + } + if (is_get) { std::swap(dst, src); @@ -825,24 +898,17 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) } case 4: { - //if (is_get && !from_mfc) - { - *static_cast(dst) = *static_cast(src); - break; - } - - //_mm_stream_si32(static_cast(dst), *static_cast(src)); + *static_cast(dst) = *static_cast(src); break; } case 8: { - //if (is_get && !from_mfc) - { - *static_cast(dst) = *static_cast(src); - break; - } - - //_mm_stream_si64(static_cast(dst), *static_cast(src)); + *static_cast(dst) = *static_cast(src); + break; + } + case 16: + { + _mm_store_si128(static_cast<__m128i*>(dst), _mm_load_si128(static_cast(src))); break; } default: @@ -851,45 +917,6 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) auto vsrc = static_cast(src); auto vcnt = size / sizeof(__m128i); - //if (is_get && !from_mfc) - { - while (vcnt >= 8) - { - const __m128i data[] - { - _mm_load_si128(vsrc + 0), - _mm_load_si128(vsrc + 1), - _mm_load_si128(vsrc + 2), - _mm_load_si128(vsrc + 3), - _mm_load_si128(vsrc + 4), - _mm_load_si128(vsrc + 5), - _mm_load_si128(vsrc + 6), - _mm_load_si128(vsrc + 7), - }; - - _mm_store_si128(vdst + 0, data[0]); - _mm_store_si128(vdst + 1, data[1]); - _mm_store_si128(vdst + 2, data[2]); - _mm_store_si128(vdst + 3, data[3]); - _mm_store_si128(vdst + 4, data[4]); - _mm_store_si128(vdst + 5, data[5]); - _mm_store_si128(vdst + 6, data[6]); - _mm_store_si128(vdst + 7, data[7]); - - vcnt -= 8; - vsrc += 8; - vdst += 8; - } - - while (vcnt--) - { - _mm_store_si128(vdst++, _mm_load_si128(vsrc++)); - } - - break; - } - - // Disabled while (vcnt >= 8) { const __m128i data[] @@ -904,14 +931,14 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) _mm_load_si128(vsrc + 7), }; - _mm_stream_si128(vdst + 0, data[0]); - _mm_stream_si128(vdst + 1, data[1]); - _mm_stream_si128(vdst + 2, data[2]); - _mm_stream_si128(vdst + 3, data[3]); - _mm_stream_si128(vdst + 4, data[4]); - _mm_stream_si128(vdst + 5, data[5]); - _mm_stream_si128(vdst + 6, data[6]); - _mm_stream_si128(vdst + 7, data[7]); + _mm_store_si128(vdst + 0, data[0]); + _mm_store_si128(vdst + 1, data[1]); + _mm_store_si128(vdst + 2, data[2]); + _mm_store_si128(vdst + 3, data[3]); + _mm_store_si128(vdst + 4, data[4]); + _mm_store_si128(vdst + 5, data[5]); + _mm_store_si128(vdst + 6, data[6]); + _mm_store_si128(vdst + 7, data[7]); vcnt -= 8; vsrc += 8; @@ -920,15 +947,11 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) while (vcnt--) { - _mm_stream_si128(vdst++, _mm_load_si128(vsrc++)); + _mm_store_si128(vdst++, _mm_load_si128(vsrc++)); } + break; } } - - if (is_get) - { - //_mm_sfence(); - } } bool SPUThread::do_dma_check(const spu_mfc_cmd& args) @@ -1048,7 +1071,7 @@ void SPUThread::do_putlluc(const spu_mfc_cmd& args) const auto to_write = _ref(args.lsa & 0x3ffff); // Store unconditionally - if (g_use_rtm) + if (LIKELY(g_use_rtm)) { const u64 count = spu_putlluc_tx(addr, to_write.data()); @@ -1056,17 +1079,14 @@ void SPUThread::do_putlluc(const spu_mfc_cmd& args) { LOG_ERROR(SPU, "%s took too long: %u", args.cmd, count); } - - vm::reservation_notifier(addr, 128).notify_all(); - return; + } + else + { + auto& res = vm::reservation_lock(addr, 128); + data = to_write; + vm::reservation_update(addr, 128); } - vm::writer_lock lock(0); - vm::reservation_update(addr, 128, true); - _mm_sfence(); - data = to_write; - _mm_sfence(); - vm::reservation_update(addr, 128); vm::reservation_notifier(addr, 128).notify_all(); } @@ -1251,7 +1271,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) } } - if (g_use_rtm) + if (LIKELY(g_use_rtm)) { const u64 count = spu_getll_tx(raddr, rdata.data(), &rtime); @@ -1260,36 +1280,12 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) LOG_ERROR(SPU, "%s took too long: %u", args.cmd, count); } } - - // Do several attemps - for (uint i = 0; !g_use_rtm && i < 5; i++) + else { - rtime = vm::reservation_acquire(raddr, 128); - _mm_lfence(); - - // Check LSB: atomic store may be in progress - if (LIKELY((rtime & 1) == 0)) - { - rdata = data; - _mm_lfence(); - - if (LIKELY(vm::reservation_acquire(raddr, 128) == rtime)) - { - // Copy to LS - _ref(args.lsa & 0x3ffff) = rdata; - ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); - return true; - } - } - - busy_wait(300); - } - - if (!g_use_rtm) - { - vm::reader_lock lock; - rtime = vm::reservation_acquire(raddr, 128); + auto& res = vm::reservation_lock(raddr, 128); + rtime = res & ~1ull; rdata = data; + res &= ~1ull; } // Copy to LS @@ -1308,7 +1304,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) if (raddr == args.eal && rtime == vm::reservation_acquire(raddr, 128)) { - if (g_use_rtm) + if (LIKELY(g_use_rtm)) { if (spu_putllc_tx(raddr, rtime, rdata.data(), to_write.data())) { @@ -1320,20 +1316,24 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) } else if (rdata == data) { + auto& res = vm::reservation_lock(raddr, 128); + + vm::_ref>(raddr) += 0; + // Full lock (heavyweight) // TODO: vm::check_addr vm::writer_lock lock(1); - if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data) + if (rtime == (res & ~1ull) && rdata == data) { - vm::reservation_update(raddr, 128, true); - _mm_sfence(); data = to_write; - _mm_sfence(); - result = true; - vm::reservation_update(raddr, 128); vm::reservation_notifier(raddr, 128).notify_all(); + result = true; + } + else + { + res &= ~1ull; } } } diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 7f9168b5fa..1ba9dc4356 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -9,6 +9,7 @@ #include "Emu/Cell/lv2/sys_memory.h" #include "Emu/RSX/GSRender.h" #include +#include #include static_assert(sizeof(notifier) == 8, "Unexpected size of notifier"); @@ -232,6 +233,26 @@ namespace vm } } + void reservation_lock_internal(atomic_t& res) + { + for (u64 i = 0;; i++) + { + if (LIKELY(!atomic_storage::bts(res.raw(), 0))) + { + break; + } + + if (i < 15) + { + busy_wait(500); + } + else + { + std::this_thread::yield(); + } + } + } + // Page information struct memory_page { @@ -479,12 +500,21 @@ namespace vm , size(size) , flags(flags) { - // Allocate compressed reservation info area (avoid RSX and SPU areas) - if (addr != 0xc0000000 && addr != 0xe0000000) + // Allocate compressed reservation info area (avoid SPU MMIO area) + if (addr != 0xe0000000) { utils::memory_commit(g_reservations + addr / 16, size / 16); utils::memory_commit(g_reservations2 + addr / 16, size / 16); } + else + { + // RawSPU LS + for (u32 i = 0; i < 6; i++) + { + utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000); + utils::memory_commit(g_reservations2 + addr / 16 + i * 0x10000, 0x4000); + } + } } block_t::~block_t() diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index d11087528c..5c3815d0ad 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -105,6 +105,20 @@ namespace vm return *reinterpret_cast(g_reservations2 + addr / 128 * 8); } + void reservation_lock_internal(atomic_t&); + + inline atomic_t& reservation_lock(u32 addr, u32 size) + { + auto& res = vm::reservation_acquire(addr, size); + + if (UNLIKELY(atomic_storage::bts(res.raw(), 0))) + { + reservation_lock_internal(res); + } + + return res; + } + // Change memory protection of specified memory region bool page_protect(u32 addr, u32 size, u8 flags_test = 0, u8 flags_set = 0, u8 flags_clear = 0); diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 948f049f8f..739ac99fa3 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -118,14 +118,15 @@ namespace rsx rsx->sync_point_request = true; const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e()); - if (g_use_rtm || addr >> 28 == 0x4) + if (LIKELY(g_use_rtm)) { vm::write32(addr, arg); } else { - vm::reader_lock lock; + auto& res = vm::reservation_lock(addr, 4); vm::write32(addr, arg); + res &= ~1ull; } if (addr >> 28 != 0x4)