From 346a1d4433621db384005eff587e69dceb46dd47 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 8 Oct 2020 16:13:55 +0300 Subject: [PATCH] vm: rewrite reservation bits Implement classic unique/shared locking concept. Implement vm::reservation_light_op. --- rpcs3/Emu/CPU/CPUThread.cpp | 3 +- rpcs3/Emu/CPU/CPUThread.h | 1 + rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 92 ++++++++-------- rpcs3/Emu/Cell/Modules/cellSpurs.h | 16 +++ rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp | 14 +-- rpcs3/Emu/Cell/PPUThread.cpp | 131 +++++++++++++++++----- rpcs3/Emu/Cell/SPUThread.cpp | 137 +++++++++++++----------- rpcs3/Emu/Memory/vm.cpp | 34 +++++- rpcs3/Emu/Memory/vm_reservation.h | 90 ++++++++++++---- rpcs3/Emu/RSX/rsx_methods.cpp | 8 +- 10 files changed, 356 insertions(+), 170 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 604d0429f8..2ad974fb0f 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -338,7 +338,7 @@ void cpu_thread::operator()() { thread_ctrl::set_native_priority(-1); } - + // force input/output denormals to zero for SPU threads (FTZ/DAZ) _mm_setcsr( _mm_getcsr() | 0x8040 ); @@ -653,6 +653,7 @@ cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept for_all_cpu([](cpu_thread* cpu) { + // Should be atomic if (!(cpu->state & cpu_flag::pause)) { cpu->state += cpu_flag::pause; diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 853d967eec..0aebb8c345 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -12,6 +12,7 @@ enum class cpu_flag : u32 exit, // Irreversible exit wait, // Indicates waiting state, set by the thread itself pause, // Thread suspended by suspend_all technique + pause2, // Used by suspend_all internally suspend, // Thread suspended ret, // Callback return requested signal, // Thread received a signal (HLE) diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index f9a34c4a4e..f7a135a26c 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -2536,9 +2536,10 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr spurs, u32 wid return CELL_OK; } - auto [res, rtime] = vm::reservation_lock(vm::get_addr(&spurs->wklEvent(wid)), 1, vm::dma_lockb); - const auto old = spurs->wklEvent(wid).fetch_or(1); - res.release(rtime + (old & 1 ? 0 : 128)); + const auto old = vm::reservation_light_op(spurs->wklEvent(wid), [](atomic_t& v) + { + return v.fetch_or(1); + }); if (old & 0x12 && !(old & 1) && sys_event_port_send(spurs->eventPort, 0, 0, (1u << 31) >> wid)) { @@ -2693,9 +2694,11 @@ s32 cellSpursReadyCountStore(ppu_thread& ppu, vm::ptr spurs, u32 wid, return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - auto [res, rtime] = vm::reservation_lock(spurs.addr(), 128, vm::dma_lockb); - spurs->readyCount(wid).release(static_cast(value)); - res.store(rtime + 128); + vm::reservation_light_op(spurs->readyCount(wid), [&](atomic_t& v) + { + v.release(static_cast(value)); + }); + return CELL_OK; } @@ -2729,11 +2732,11 @@ s32 cellSpursReadyCountSwap(ppu_thread& ppu, vm::ptr spurs, u32 wid, return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - auto [res, rtime] = vm::reservation_lock(spurs.addr(), 128, vm::dma_lockb); - u32 temp = spurs->readyCount(wid).exchange(static_cast(swap)); - res.release(rtime + 128); + *old = vm::reservation_light_op(spurs->readyCount(wid), [&](atomic_t& v) + { + return v.exchange(static_cast(swap)); + }); - *old = temp; return CELL_OK; } @@ -2769,9 +2772,10 @@ s32 cellSpursReadyCountCompareAndSwap(ppu_thread& ppu, vm::ptr spurs, u8 temp = static_cast(compare); - auto [res, rtime] = vm::reservation_lock(spurs.addr(), 128, vm::dma_lockb); - spurs->readyCount(wid).compare_exchange(temp, static_cast(swap)); - res.release(rtime + 128); + vm::reservation_light_op(spurs->readyCount(wid), [&](atomic_t& v) + { + v.compare_exchange(temp, static_cast(swap)); + }); *old = temp; return CELL_OK; @@ -2807,17 +2811,15 @@ s32 cellSpursReadyCountAdd(ppu_thread& ppu, vm::ptr spurs, u32 wid, v return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - auto [res, rtime] = vm::reservation_lock(spurs.addr(), 128, vm::dma_lockb); - - u32 temp = spurs->readyCount(wid).fetch_op([&](u8& val) + *old = vm::reservation_light_op(spurs->readyCount(wid), [&](atomic_t& v) { - const s32 _new = val + value; - val = static_cast(std::clamp(_new, 0, 0xFF)); + return v.fetch_op([&](u8& val) + { + const s32 _new = val + value; + val = static_cast(std::clamp(_new, 0, 255)); + }); }); - res.release(rtime + 128); - - *old = temp; return CELL_OK; } @@ -3833,13 +3835,12 @@ s32 _spurs::create_task(vm::ptr taskset, vm::ptr task_id, // TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000 u32 tmp_task_id; - { - auto addr = taskset.ptr(&CellSpursTaskset::enabled).addr(); - auto [res, rtime] = vm::reservation_lock(addr, 16, vm::dma_lockb); + vm::reservation_light_op(vm::_ref>(taskset.ptr(&CellSpursTaskset::enabled).addr()), [&](atomic_be_t& ptr) + { // NOTE: Realfw processes this using 4 32-bits atomic loops // But here its processed within a single 128-bit atomic op - vm::_ref>(addr).fetch_op([&](be_t& value) + ptr.fetch_op([&](be_t& value) { auto value0 = value.value(); @@ -3862,9 +3863,7 @@ s32 _spurs::create_task(vm::ptr taskset, vm::ptr task_id, tmp_task_id = CELL_SPURS_MAX_TASK; return false; }); - - res.release(rtime + 128); - } + }); if (tmp_task_id >= CELL_SPURS_MAX_TASK) { @@ -3885,9 +3884,10 @@ s32 _spurs::create_task(vm::ptr taskset, vm::ptr task_id, s32 _spurs::task_start(ppu_thread& ppu, vm::ptr taskset, u32 taskId) { - auto [res, rtime] = vm::reservation_lock(taskset.ptr(&CellSpursTaskset::pending_ready).addr(), 16, vm::dma_lockb); - taskset->pending_ready.values[taskId / 32] |= (1u << 31) >> (taskId % 32); - res.release(rtime + 128); + vm::reservation_light_op(taskset->pending_ready, [&](CellSpursTaskset::atomic_tasks_bitset& v) + { + v.values[taskId / 32] |= (1u << 31) >> (taskId % 32); + }); auto spurs = +taskset->spurs; ppu_execute<&cellSpursSendWorkloadSignal>(ppu, spurs, +taskset->wid); @@ -4706,25 +4706,23 @@ s32 cellSpursJobGuardNotify(ppu_thread& ppu, vm::ptr jobGuard if (!jobGuard.aligned()) return CELL_SPURS_JOB_ERROR_ALIGN; - auto [res, rtime] = vm::reservation_lock(jobGuard.addr(), 128, vm::dma_lockb); - u32 allow_jobchain_run = 0; // Affects cellSpursJobChainRun execution + u32 old = 0; - auto [old, ok] = jobGuard->ncount0.fetch_op([&](be_t& value) + const bool ok = vm::reservation_op(vm::unsafe_ptr_cast(jobGuard), [&](CellSpursJobGuard_x00& jg) { - allow_jobchain_run = jobGuard->zero; + allow_jobchain_run = jg.zero; + old = jg.ncount0; - if (!value) + if (!jg.ncount0) { return false; } - --value; + jg.ncount0--; return true; }); - res.release(rtime + (ok ? 128 : 0)); - if (!ok) { return CELL_SPURS_CORE_ERROR_STAT; @@ -4759,9 +4757,11 @@ s32 cellSpursJobGuardReset(vm::ptr jobGuard) if (!jobGuard.aligned()) return CELL_SPURS_JOB_ERROR_ALIGN; - auto [res, rtime] = vm::reservation_lock(jobGuard.addr(), 128, vm::dma_lockb); - jobGuard->ncount0 = jobGuard->ncount1; - res.release(rtime + 128); + vm::reservation_light_op(jobGuard->ncount0, [&](atomic_be_t& ncount0) + { + ncount0 = jobGuard->ncount1; + }); + return CELL_OK; } @@ -4844,9 +4844,11 @@ s32 cellSpursJobSetMaxGrab(vm::ptr jobChain, u32 maxGrabbedJo if ((spurs->wklEnabled & (0x80000000u >> wid)) == 0u) return CELL_SPURS_JOB_ERROR_STAT; - auto [res, rtime] = vm::reservation_lock(jobChain.addr(), 128, vm::dma_lockb); - jobChain->maxGrabbedJob.release(static_cast(maxGrabbedJob)); - res.store(rtime + 128); + vm::reservation_light_op(jobChain->maxGrabbedJob, [&](atomic_be_t& v) + { + v.release(static_cast(maxGrabbedJob)); + }); + return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.h b/rpcs3/Emu/Cell/Modules/cellSpurs.h index 4adfff87ae..b602bf97fd 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.h +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.h @@ -561,6 +561,22 @@ struct alignas(128) CellSpursJobGuard CHECK_SIZE_ALIGN(CellSpursJobGuard, 128, 128); +struct alignas(128) CellSpursJobGuard_x00 +{ + be_t ncount0; // 0x00 + be_t ncount1; // 0x04 + vm::bptr jobChain; // 0x0C + be_t unk0; + be_t requestSpuCount; // 0x10 + be_t unk1[3]; + be_t autoReset; // 0x20 + be_t unk2[3]; + be_t zero; // 0x30 + u8 unk3[0x80 - 0x34]; +}; + +CHECK_SIZE_ALIGN(CellSpursJobGuard_x00, 128, 128); + // Core CellSpurs structures struct alignas(128) CellSpurs { diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp index ebe3f7b0c8..ab4b2f920a 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp @@ -1431,7 +1431,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i // Find the number of tasks that have become ready since the last iteration { auto newlyReadyTasks = v128::andnot(ready, signalled | pready); - + // TODO: Optimize this shit with std::popcount when it's known to be fixed for (auto i = 0; i < 128; i++) { @@ -1597,14 +1597,14 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i { auto spurs = kernelCtxt->spurs; - auto [res, rtime] = vm::reservation_lock(spurs.addr(), 128, vm::dma_lockb); - spurs->readyCount(kernelCtxt->wklCurrentId).fetch_op([&](u8& val) + vm::reservation_light_op(spurs->readyCount(kernelCtxt->wklCurrentId), [&](atomic_t& val) { - const s32 _new = val + numNewlyReadyTasks; - val = static_cast(std::clamp(_new, 0, 0xFF)); + val.fetch_op([&](u8& val) + { + const s32 _new = val + numNewlyReadyTasks; + val = static_cast(std::clamp(_new, 0, 0xFF)); + }); }); - - res.release(rtime + 128); } return rc; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 915e9fa5f3..66a49f1340 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -946,7 +946,7 @@ void ppu_thread::fast_call(u32 addr, u32 rtoc) if (_this->current_function && vm::read32(cia) != ppu_instructions::SC(0)) { - return fmt::format("PPU[0x%x] Thread (%s) [HLE:0x%08x, LR:0x%08x]", _this->id, *name_cache.get(), cia, _this->lr); + return fmt::format("PPU[0x%x] Thread (%s) [HLE:0x%08x, LR:0x%08x]", _this->id, *name_cache.get(), cia, _this->lr); } return fmt::format("PPU[0x%x] Thread (%s) [0x%08x]", _this->id, *name_cache.get(), cia); @@ -1103,7 +1103,6 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) const u64 data_off = (addr & 7) * 8; ppu.raddr = addr; - const u64 mask_res = g_use_rtm ? (-128 | vm::dma_lockb) : -1; if (const s32 max = g_cfg.core.ppu_128_reservations_loop_max_length) { @@ -1160,7 +1159,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) for (u64 count = 0;; [&]() { if (ppu.state) - { + { ppu.check_state(); } else if (++count < 20) [[likely]] @@ -1175,7 +1174,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) } }()) { - ppu.rtime = vm::reservation_acquire(addr, sizeof(T)) & mask_res; + ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); if (ppu.rtime & 127) { @@ -1189,7 +1188,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) mov_rdata(ppu.rdata, vm::_ref(addr & -128)); } - if ((vm::reservation_acquire(addr, sizeof(T)) & mask_res) == ppu.rtime) [[likely]] + if (vm::reservation_acquire(addr, sizeof(T)) == ppu.rtime) [[likely]] { if (count >= 15) [[unlikely]] { @@ -1218,6 +1217,7 @@ const auto ppu_stcx_tx = build_function_asm(addr & -128))) + // Obtain unique lock + while (res.bts(std::countr_zero(vm::rsrv_unique_lock))) + { + busy_wait(100); + + // Give up if reservation has been updated + if ((res & -128) != rtime) + { + res -= 1; + return false; + } + } + + if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref(addr & -128))) { data.release(reg_value); - res.release(rtime + 128); + res += 63; return true; } - res.release(rtime); + res -= (vm::rsrv_unique_lock + 1); return false; } - if (!vm::reservation_trylock(res, rtime)) + while (res.bts(std::countr_zero(vm::rsrv_unique_lock))) { + // Give up if reservation has been updated + if ((res & -128) != rtime) + { + return false; + } + + if (ppu.state && ppu.check_state()) + { + return false; + } + else + { + busy_wait(100); + } + } + + if ((res & -128) != rtime) + { + res -= vm::rsrv_unique_lock; return false; } @@ -1654,24 +1693,64 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) default: break; } - if (res == rtime && vm::reservation_trylock(res, rtime)) + if (res.fetch_add(1) & vm::rsrv_unique_lock) { - const bool ret = data.compare_and_swap_test(old_data, reg_value); - res.release(rtime + 128); - return ret; + res -= 1; + return false; } + if (data.compare_and_swap_test(old_data, reg_value)) + { + res += 127; + return true; + } + + res -= 1; return false; } - if (!vm::reservation_trylock(res, rtime)) + while (true) { - return false; + auto [_old, _ok] = res.fetch_op([&](u64& r) + { + if ((r & -128) != rtime || (r & vm::rsrv_unique_lock)) + { + return false; + } + + r += 1; + return true; + }); + + // Give up if reservation has been updated + if ((_old & -128) != rtime) + { + return false; + } + + if (_ok) + { + break; + } + + if (ppu.state && ppu.check_state()) + { + return false; + } + else + { + busy_wait(100); + } } - const bool ret = data.compare_and_swap_test(old_data, reg_value); - res.release(rtime + 128); - return ret; + if (data.compare_and_swap_test(old_data, reg_value)) + { + res += 127; + return true; + } + + res -=1; + return false; }()) { res.notify_all(); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 15be3b017f..facba04dd8 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -378,11 +378,11 @@ const auto spu_putllc_tx = build_function_asm(vm::putlluc_lockb)); - c.jc(fail2); + // Lock reservation + c.mov(x86::eax, 1); + c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax); + c.test(x86::eax, vm::rsrv_unique_lock); + c.jnz(fall2); build_transaction_enter(c, fall2, x86::r12, 666); @@ -719,16 +725,12 @@ const auto spu_putlluc_tx = build_function_asm(dst) = *reinterpret_cast(src); - res.release(time0 + 128); + res += 64; break; } case 2: { - auto [res, time0] = vm::reservation_lock(eal, 2, vm::dma_lockb); + auto [res, time0] = vm::reservation_lock(eal); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(time0 + 128); + res += 64; break; } case 4: { - auto [res, time0] = vm::reservation_lock(eal, 4, vm::dma_lockb); + auto [res, time0] = vm::reservation_lock(eal); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(time0 + 128); + res += 64; break; } case 8: { - auto [res, time0] = vm::reservation_lock(eal, 8, vm::dma_lockb); + auto [res, time0] = vm::reservation_lock(eal); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(time0 + 128); + res += 64; break; } default: @@ -1463,7 +1465,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) } // Lock each cache line execlusively - auto [res, time0] = vm::reservation_lock(eal, size0, vm::dma_lockb); + auto [res, time0] = vm::reservation_lock(eal); switch (size0) { @@ -1491,7 +1493,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) } } - res.release(time0 + 128); + res += 64; if (size == size0) { @@ -1505,7 +1507,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) if (((eal & 127) + size) <= 128) { // Lock one cache line - auto [res, time0] = vm::reservation_lock(eal, 128); + auto [res, time0] = vm::reservation_lock(eal); while (size) { @@ -1516,7 +1518,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) size -= 16; } - res.release(time0); + res += 64; break; } @@ -1848,21 +1850,34 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) cpu_thread::suspend_all cpu_lock(this); - // Give up if PUTLLUC happened - if (res == (rtime | 1)) + // Obtain unique lock + while (res.bts(std::countr_zero(vm::rsrv_unique_lock))) + { + busy_wait(100); + + // Give up if reservation has been updated + if ((res & -128) != rtime) + { + res -= 1; + if (render) render->unpause(); + return false; + } + } + + if ((res & -128) == rtime) { auto& data = vm::_ref(addr); if (cmp_rdata(rdata, data)) { mov_rdata(data, to_write); - res += 127; + res += 63; if (render) render->unpause(); return true; } } - res -= 1; + res -= (vm::rsrv_unique_lock | 1); if (render) render->unpause(); return false; } @@ -1872,8 +1887,27 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) } } - if (!vm::reservation_trylock(res, rtime)) + while (res.bts(std::countr_zero(vm::rsrv_unique_lock))) { + // Give up if reservation has been updated + if ((res & -128) != rtime) + { + return false; + } + + if (state && check_state()) + { + return false; + } + else + { + busy_wait(100); + } + } + + if ((res & -128) != rtime) + { + res -= vm::rsrv_unique_lock; return false; } @@ -1914,7 +1948,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) if (raddr) { // Last check for event before we clear the reservation - if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & (-128 | vm::dma_lockb)) || !cmp_rdata(rdata, vm::_ref(raddr))) + if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & -128) || !cmp_rdata(rdata, vm::_ref(raddr))) { set_events(SPU_EVENT_LR); } @@ -1937,38 +1971,17 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) if (render) render->pause(); - if (result == 2) + if (result == 0) { cpu_thread::suspend_all cpu_lock(cpu); - if (vm::reservation_acquire(addr, 128) & 64) - { - // Wait for PUTLLC to complete - while (vm::reservation_acquire(addr, 128) & 63) - { - busy_wait(100); - } - - mov_rdata(vm::_ref(addr), *static_cast(to_write)); - vm::reservation_acquire(addr, 128) += 64; - } - } - else if (result == 0) - { - cpu_thread::suspend_all cpu_lock(cpu); - - while (vm::reservation_acquire(addr, 128).bts(std::countr_zero(vm::putlluc_lockb))) - { - busy_wait(100); - } - - while (vm::reservation_acquire(addr, 128) & 63) + while (vm::reservation_acquire(addr, 128).bts(std::countr_zero(vm::rsrv_unique_lock))) { busy_wait(100); } mov_rdata(vm::_ref(addr), *static_cast(to_write)); - vm::reservation_acquire(addr, 128) += 64; + vm::reservation_acquire(addr, 128) += 63; } if (render) render->unpause(); @@ -1977,7 +1990,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write) else { auto& data = vm::_ref(addr); - auto [res, time0] = vm::reservation_lock(addr, 128); + auto [res, time0] = vm::reservation_lock(addr); *reinterpret_cast*>(&data) += 0; @@ -2247,7 +2260,7 @@ bool spu_thread::process_mfc_cmd() if (raddr && raddr != addr) { // Last check for event before we replace the reservation with a new one - if ((vm::reservation_acquire(raddr, 128) & (-128 | vm::dma_lockb)) != rtime || !cmp_rdata(temp, vm::_ref(raddr))) + if ((vm::reservation_acquire(raddr, 128) & -128) != rtime || !cmp_rdata(temp, vm::_ref(raddr))) { set_events(SPU_EVENT_LR); } @@ -3583,7 +3596,7 @@ bool spu_thread::capture_local_storage() const if (name.empty()) { // TODO: Maybe add thread group name here - fmt::append(name, "SPU.%u", lv2_id); + fmt::append(name, "SPU.%u", lv2_id); } } else diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 0251f28dc8..a15b805f0d 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -441,11 +441,11 @@ namespace vm g_mutex.unlock(); } - u64 reservation_lock_internal(u32 addr, atomic_t& res, u64 lock_bits) + u64 reservation_lock_internal(u32 addr, atomic_t& res) { for (u64 i = 0;; i++) { - if (u64 rtime = res; !(rtime & 127) && reservation_trylock(res, rtime, lock_bits)) [[likely]] + if (u64 rtime = res; !(rtime & 127) && reservation_try_lock(res, rtime)) [[likely]] { return rtime; } @@ -471,6 +471,30 @@ namespace vm } } + void reservation_shared_lock_internal(atomic_t& res) + { + for (u64 i = 0;; i++) + { + if (!(res & rsrv_unique_lock)) [[likely]] + { + return; + } + + if (auto cpu = get_current_cpu_thread(); cpu && cpu->state) + { + cpu->check_state(); + } + else if (i < 15) + { + busy_wait(500); + } + else + { + std::this_thread::yield(); + } + } + } + void reservation_op_internal(u32 addr, std::function func) { const auto _cpu = get_current_cpu_thread(); @@ -481,15 +505,15 @@ namespace vm { cpu_thread::suspend_all cpu_lock(_cpu); - // Wait to acquire PUTLLUC lock - while (vm::reservation_acquire(addr, 128).bts(std::countr_zero(vm::putlluc_lockb))) + // Wait to acquire unique lock + while (vm::reservation_acquire(addr, 128).bts(std::countr_zero(vm::rsrv_unique_lock))) { busy_wait(100); } if (func()) { - // Success, release PUTLLUC and PUTLLC locks if necessary + // Success, release all locks if necessary vm::reservation_acquire(addr, 128) += 63; } else diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index fd593c28db..2b022659f6 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -10,11 +10,11 @@ extern bool g_use_rtm; namespace vm { - enum reservation_lock_bit : u64 + enum : u64 { - stcx_lockb = 1 << 0, // Exclusive conditional reservation lock - dma_lockb = 1 << 5, // Exclusive unconditional reservation lock - putlluc_lockb = 1 << 6, // Exclusive unconditional reservation lock + rsrv_lock_mask = 127, + rsrv_unique_lock = 64, + rsrv_shared_mask = 63, }; // Get reservation status for further atomic update: last update timestamp @@ -42,11 +42,13 @@ namespace vm return *reinterpret_cast*>(g_reservations + (addr & 0xff80) / 2); } - u64 reservation_lock_internal(u32, atomic_t&, u64); + u64 reservation_lock_internal(u32, atomic_t&); - inline bool reservation_trylock(atomic_t& res, u64 rtime, u64 lock_bits = stcx_lockb) + void reservation_shared_lock_internal(atomic_t&); + + inline bool reservation_try_lock(atomic_t& res, u64 rtime) { - if (res.compare_and_swap_test(rtime, rtime + lock_bits)) [[likely]] + if (res.compare_and_swap_test(rtime, rtime | rsrv_unique_lock)) [[likely]] { return true; } @@ -54,16 +56,16 @@ namespace vm return false; } - inline std::pair&, u64> reservation_lock(u32 addr, u32 size, u64 lock_bits = stcx_lockb) + inline std::pair&, u64> reservation_lock(u32 addr) { - auto res = &vm::reservation_acquire(addr, size); + auto res = &vm::reservation_acquire(addr, 1); auto rtime = res->load(); - if (rtime & 127 || !reservation_trylock(*res, rtime, lock_bits)) [[unlikely]] + if (rtime & 127 || !reservation_try_lock(*res, rtime)) [[unlikely]] { static atomic_t no_lock{}; - rtime = reservation_lock_internal(addr, *res, lock_bits); + rtime = reservation_lock_internal(addr, *res); if (rtime == umax) { @@ -95,6 +97,7 @@ namespace vm // Stage 1: single optimistic transaction attempt unsigned status = _XBEGIN_STARTED; + u64 _old = 0; #ifndef _MSC_VER __asm__ goto ("xbegin %l[stage2];" ::: "memory" : stage2); @@ -103,6 +106,15 @@ namespace vm if (status == _XBEGIN_STARTED) #endif { + if (res & rsrv_unique_lock) + { +#ifndef _MSC_VER + __asm__ volatile ("xabort $0;" ::: "memory"); +#else + _xabort(0); +#endif + } + if constexpr (std::is_void_v>) { res += 128; @@ -161,10 +173,10 @@ namespace vm } // Stage 2: try to lock reservation first - res += stcx_lockb; + _old = res.fetch_add(1); // Start lightened transaction (TODO: tweaking) - while (true) + while (!(_old & rsrv_unique_lock)) { #ifndef _MSC_VER __asm__ goto ("xbegin %l[retry];" ::: "memory" : retry); @@ -263,11 +275,8 @@ namespace vm } } - - // Perform under heavyweight lock - auto& res = vm::reservation_acquire(addr, 128); - - res += stcx_lockb; + // Perform heavyweight lock + auto [res, rtime] = vm::reservation_lock(addr); // Write directly if the op cannot fail if constexpr (std::is_void_v>) @@ -294,12 +303,12 @@ namespace vm { // If operation succeeds, write the data back *sptr = buf; - res += 127; + res.release(rtime + 128); } else { // Operation failed, no memory has been modified - res -= 1; + res.release(rtime); return std::invoke_result_t(); } } @@ -363,4 +372,45 @@ namespace vm } } } + + template + SAFE_BUFFERS inline auto reservation_light_op(T& data, F op) + { + // Optimized real ptr -> vm ptr conversion, simply UB if out of range + const u32 addr = static_cast(reinterpret_cast(&data) - g_base_addr); + + // Use "super" pointer to prevent access violation handling during atomic op + const auto sptr = vm::get_super_ptr(addr); + + // "Lock" reservation + auto& res = vm::reservation_acquire(addr, 128); + + if (res.fetch_add(1) & vm::rsrv_unique_lock) [[unlikely]] + { + vm::reservation_shared_lock_internal(res); + } + + if constexpr (std::is_void_v>) + { + std::invoke(op, *sptr); + res += 127; + + if constexpr (Ack) + { + res.notify_all(); + } + } + else + { + auto result = std::invoke(op, *sptr); + res += 127; + + if constexpr (Ack) + { + res.notify_all(); + } + + return result; + } + } } // namespace vm diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 007bee6f5c..1f0318ea52 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -151,14 +151,14 @@ namespace rsx // TODO: Check if possible to write on reservations if (!g_use_rtm && rsx->label_addr >> 28 != addr >> 28) [[likely]] { - res = &vm::reservation_lock(addr, 4).first; + res = &vm::reservation_lock(addr).first; } vm::_ref(addr).val = arg; if (res) { - res->release(*res + 127); + res += 127; } vm::reservation_notifier(addr, 4).notify_all(); @@ -818,7 +818,7 @@ namespace rsx case CELL_GCM_FUNC_ADD_SIGNED: case CELL_GCM_FUNC_REVERSE_ADD_SIGNED: break; - + default: { // Ignore invalid values as a whole @@ -1513,7 +1513,7 @@ namespace rsx const auto data_length = in_pitch * (line_count - 1) + line_length; rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length); - + if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer); result == rsx::result_zcull_intr) {