mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 17:01:24 +12:00
SPU/PPU atomics performance and LR event fixes (#5435)
* Fix SPU LR event setting in atomic commands according to hw test * MFC: increment timestamp for PUT cmd in non-tsx path * MFC: fix reservation lost test on non-tsx path in regard to the lock bit * Reservation notification moved out of writer_lock scope to reduce its lifetime * Use passive_lock/unlock in ppu atomic inctrustions to reduce redundancy * Lock only once for dma transfers (non-TSX) * Don't use RDTSC in reservation update logic * Remove MFC cmd args passing to process_mfc_cmd * Reorder check_state cpu_flag::memory check for faster unlocking * Specialization for 128-byte data copy in SPU dma transfers * Implement memory range locks and isolate PPU and SPU passive lock logic
This commit is contained in:
parent
f19fd23227
commit
fc92ae4085
9 changed files with 344 additions and 235 deletions
|
@ -57,8 +57,12 @@ namespace vm
|
|||
// Memory mutex acknowledgement
|
||||
thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr;
|
||||
|
||||
// Currently locked address
|
||||
atomic_t<u32> g_addr_lock = 0;
|
||||
|
||||
// Memory mutex: passive locks
|
||||
std::array<atomic_t<cpu_thread*>, 32> g_locks;
|
||||
std::array<atomic_t<cpu_thread*>, 4> g_locks{};
|
||||
std::array<atomic_t<u64>, 6> g_range_locks{};
|
||||
|
||||
static void _register_lock(cpu_thread* _cpu)
|
||||
{
|
||||
|
@ -72,11 +76,25 @@ namespace vm
|
|||
}
|
||||
}
|
||||
|
||||
bool passive_lock(cpu_thread& cpu, bool wait)
|
||||
static atomic_t<u64>* _register_range_lock(const u64 lock_info)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
for (auto& lock : g_range_locks)
|
||||
{
|
||||
if (!lock && lock.compare_and_swap_test(0, lock_info))
|
||||
{
|
||||
return &lock;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void passive_lock(cpu_thread& cpu)
|
||||
{
|
||||
if (UNLIKELY(g_tls_locked && *g_tls_locked == &cpu))
|
||||
{
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (LIKELY(g_mutex.is_lockable()))
|
||||
|
@ -84,31 +102,46 @@ namespace vm
|
|||
// Optimistic path (hope that mutex is not exclusively locked)
|
||||
_register_lock(&cpu);
|
||||
|
||||
if (UNLIKELY(!g_mutex.is_lockable()))
|
||||
if (LIKELY(g_mutex.is_lockable()))
|
||||
{
|
||||
passive_unlock(cpu);
|
||||
|
||||
if (!wait)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
::reader_lock lock(g_mutex);
|
||||
_register_lock(&cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
passive_unlock(cpu);
|
||||
}
|
||||
else
|
||||
|
||||
::reader_lock lock(g_mutex);
|
||||
_register_lock(&cpu);
|
||||
}
|
||||
|
||||
atomic_t<u64>* passive_lock(const u32 addr, const u32 end)
|
||||
{
|
||||
static const auto test_addr = [](const u32 target, const u32 addr, const u32 end)
|
||||
{
|
||||
if (!wait)
|
||||
return addr > target || end <= target;
|
||||
};
|
||||
|
||||
atomic_t<u64>* _ret;
|
||||
|
||||
if (LIKELY(test_addr(g_addr_lock.load(), addr, end)))
|
||||
{
|
||||
// Optimistic path (hope that address range is not locked)
|
||||
_ret = _register_range_lock((u64)end << 32 | addr);
|
||||
|
||||
if (LIKELY(test_addr(g_addr_lock.load(), addr, end)))
|
||||
{
|
||||
return false;
|
||||
return _ret;
|
||||
}
|
||||
|
||||
::reader_lock lock(g_mutex);
|
||||
_register_lock(&cpu);
|
||||
*_ret = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
{
|
||||
::reader_lock lock(g_mutex);
|
||||
_ret = _register_range_lock((u64)end << 32 | addr);
|
||||
}
|
||||
|
||||
return _ret;
|
||||
}
|
||||
|
||||
void passive_unlock(cpu_thread& cpu)
|
||||
|
@ -194,8 +227,7 @@ namespace vm
|
|||
m_upgraded = true;
|
||||
}
|
||||
|
||||
writer_lock::writer_lock(int full)
|
||||
: locked(true)
|
||||
writer_lock::writer_lock(u32 addr)
|
||||
{
|
||||
auto cpu = get_current_cpu_thread();
|
||||
|
||||
|
@ -206,7 +238,7 @@ namespace vm
|
|||
|
||||
g_mutex.lock();
|
||||
|
||||
if (full)
|
||||
if (addr)
|
||||
{
|
||||
for (auto& lock : g_locks)
|
||||
{
|
||||
|
@ -216,6 +248,30 @@ namespace vm
|
|||
}
|
||||
}
|
||||
|
||||
g_addr_lock = addr;
|
||||
|
||||
for (auto& lock : g_range_locks)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
const u64 value = lock;
|
||||
|
||||
// Test beginning address
|
||||
if (static_cast<u32>(value) > addr)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Test end address
|
||||
if (static_cast<u32>(value >> 32) <= addr)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& lock : g_locks)
|
||||
{
|
||||
while (cpu_thread* ptr = lock)
|
||||
|
@ -225,7 +281,7 @@ namespace vm
|
|||
break;
|
||||
}
|
||||
|
||||
busy_wait();
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -239,10 +295,8 @@ namespace vm
|
|||
|
||||
writer_lock::~writer_lock()
|
||||
{
|
||||
if (locked)
|
||||
{
|
||||
g_mutex.unlock();
|
||||
}
|
||||
g_addr_lock.raw() = 0;
|
||||
g_mutex.unlock();
|
||||
}
|
||||
|
||||
void reservation_lock_internal(atomic_t<u64>& res)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue