From 46d3066c6294a738a51eb7b344047a705de4710c Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 1 Nov 2020 16:46:06 +0300 Subject: [PATCH] Optimize vm::range_lock Only test address on `range_locked` Don't check current transaction Remove vm::clear_range_locks completely --- rpcs3/Emu/Cell/SPUThread.cpp | 4 +- rpcs3/Emu/Memory/vm.cpp | 72 ++++++++++++++++------------------- rpcs3/Emu/Memory/vm_locking.h | 10 ++--- 3 files changed, 39 insertions(+), 47 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 098c38c6c4..b4a7108ff6 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2104,8 +2104,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* perf_meter<"DMA_PUT"_u64> perf2; - // TODO: split range-locked stores in cache lines for consistency - auto res = &vm::reservation_acquire(eal, args.size); + // Temporarily disabled, may be removed at all + atomic_t* res = nullptr; switch (u32 size = args.size) { diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 9ccc2c49be..7f3cf2d275 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -156,7 +156,14 @@ namespace vm { perf_meter<"RHW_LOCK"_u64> perf0; - while (true) + auto _cpu = get_current_cpu_thread(); + + if (_cpu) + { + _cpu->state += cpu_flag::wait + cpu_flag::temp; + } + + for (u64 i = 0;; i++) { const u64 lock_val = g_range_lock.load(); const u64 lock_addr = static_cast(lock_val); // -> u64 @@ -166,12 +173,13 @@ namespace vm u64 addr = begin; - if (g_shareable[begin >> 16] || lock_bits == range_sharing) + // Only useful for range_locked, and is reliable in this case + if (g_shareable[begin >> 16]) { addr = addr & 0xffff; } - if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] + if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] { range_lock->store(begin | (u64{size} << 32)); @@ -180,18 +188,28 @@ namespace vm if (!new_lock_val && !new_res_val) [[likely]] { - return; + break; } if (new_lock_val == lock_val && !new_res_val) [[likely]] { - return; + break; } range_lock->release(0); } - std::shared_lock lock(g_mutex); + std::shared_lock lock(g_mutex, std::try_to_lock); + + if (!lock && i < 15) + { + busy_wait(200); + continue; + } + else if (!lock) + { + lock.lock(); + } u32 test = 0; @@ -213,6 +231,14 @@ namespace vm vm::_ref>(test) += 0; continue; } + + range_lock->release(begin | (u64{size} << 32)); + break; + } + + if (_cpu) + { + _cpu->check_state(); } } @@ -252,36 +278,6 @@ namespace vm return result; } - void clear_range_locks(u32 addr, u32 size) - { - ASSUME(size); - - const auto range = utils::address_range::start_length(addr, size); - - // Wait for range locks to clear - while (true) - { - const u64 bads = for_all_range_locks([&](u32 addr2, u32 size2) - { - ASSUME(size2); - - if (range.overlaps(utils::address_range::start_length(addr2, size2))) [[unlikely]] - { - return 1; - } - - return 0; - }); - - if (!bads) - { - return; - } - - _mm_pause(); - } - } - static void _lock_shareable_cache(u64 flags, u32 addr, u32 size) { // Can't do 512 MiB or more at once @@ -290,10 +286,8 @@ namespace vm fmt::throw_exception("Failed to lock range (flags=0x%x, addr=0x%x, size=0x%x)" HERE, flags >> 32, addr, size); } - // Block new range locks + // Block or signal new range locks g_range_lock = addr | u64{size} << 35 | flags; - - clear_range_locks(addr, size); } void passive_lock(cpu_thread& cpu) diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index 279f5a0a7f..824e84ccc6 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -23,7 +23,7 @@ namespace vm /* flag combinations with special meaning */ range_normal = 3ull << 32, // R+W - range_locked = 2ull << 32, // R+W as well but do not + range_locked = 2ull << 32, // R+W as well, the only range flag that should block by address range_sharing = 4ull << 32, // Range being registered as shared, flags are unchanged range_allocation = 0, // Allocation, no safe access range_deallocation = 6ull << 32, // Deallocation, no safe access @@ -52,12 +52,13 @@ namespace vm u64 addr = begin; - if (g_shareable[begin >> 16] || lock_bits == range_sharing) + // Only used for range_locked and is reliable in this case + if (g_shareable[begin >> 16]) { addr = addr & 0xffff; } - if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] + if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] { // Optimistic locking range_lock->store(begin | (u64{size} << 32)); @@ -82,9 +83,6 @@ namespace vm range_lock_internal(res, range_lock, begin, size); } - // Wait for all range locks to release in specified range - void clear_range_locks(u32 addr, u32 size); - // Release it void free_range_lock(atomic_t*) noexcept;