SPU/PPU atomics performance and LR event fixes (#5435)

* Fix SPU LR event setting in atomic commands according to hw test
* MFC: increment timestamp for PUT cmd in non-tsx path
* MFC: fix reservation lost test on non-tsx path in regard to the lock bit
* Reservation notification moved out of writer_lock scope to reduce its lifetime
* Use passive_lock/unlock in ppu atomic inctrustions to reduce redundancy
* Lock only once for dma transfers (non-TSX)
* Don't use RDTSC in reservation update logic
* Remove MFC cmd args passing to process_mfc_cmd
* Reorder check_state cpu_flag::memory check for faster unlocking
* Specialization for 128-byte data copy in SPU dma transfers
* Implement memory range locks and isolate PPU and SPU passive lock logic
This commit is contained in:
elad 2019-01-15 17:31:21 +02:00 committed by Ivan
parent f19fd23227
commit fc92ae4085
9 changed files with 344 additions and 235 deletions

View file

@ -57,8 +57,12 @@ namespace vm
// Memory mutex acknowledgement
thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr;
// Currently locked address
atomic_t<u32> g_addr_lock = 0;
// Memory mutex: passive locks
std::array<atomic_t<cpu_thread*>, 32> g_locks;
std::array<atomic_t<cpu_thread*>, 4> g_locks{};
std::array<atomic_t<u64>, 6> g_range_locks{};
static void _register_lock(cpu_thread* _cpu)
{
@ -72,11 +76,25 @@ namespace vm
}
}
bool passive_lock(cpu_thread& cpu, bool wait)
static atomic_t<u64>* _register_range_lock(const u64 lock_info)
{
while (true)
{
for (auto& lock : g_range_locks)
{
if (!lock && lock.compare_and_swap_test(0, lock_info))
{
return &lock;
}
}
}
}
void passive_lock(cpu_thread& cpu)
{
if (UNLIKELY(g_tls_locked && *g_tls_locked == &cpu))
{
return true;
return;
}
if (LIKELY(g_mutex.is_lockable()))
@ -84,31 +102,46 @@ namespace vm
// Optimistic path (hope that mutex is not exclusively locked)
_register_lock(&cpu);
if (UNLIKELY(!g_mutex.is_lockable()))
if (LIKELY(g_mutex.is_lockable()))
{
passive_unlock(cpu);
if (!wait)
{
return false;
}
::reader_lock lock(g_mutex);
_register_lock(&cpu);
return;
}
passive_unlock(cpu);
}
else
::reader_lock lock(g_mutex);
_register_lock(&cpu);
}
atomic_t<u64>* passive_lock(const u32 addr, const u32 end)
{
static const auto test_addr = [](const u32 target, const u32 addr, const u32 end)
{
if (!wait)
return addr > target || end <= target;
};
atomic_t<u64>* _ret;
if (LIKELY(test_addr(g_addr_lock.load(), addr, end)))
{
// Optimistic path (hope that address range is not locked)
_ret = _register_range_lock((u64)end << 32 | addr);
if (LIKELY(test_addr(g_addr_lock.load(), addr, end)))
{
return false;
return _ret;
}
::reader_lock lock(g_mutex);
_register_lock(&cpu);
*_ret = 0;
}
return true;
{
::reader_lock lock(g_mutex);
_ret = _register_range_lock((u64)end << 32 | addr);
}
return _ret;
}
void passive_unlock(cpu_thread& cpu)
@ -194,8 +227,7 @@ namespace vm
m_upgraded = true;
}
writer_lock::writer_lock(int full)
: locked(true)
writer_lock::writer_lock(u32 addr)
{
auto cpu = get_current_cpu_thread();
@ -206,7 +238,7 @@ namespace vm
g_mutex.lock();
if (full)
if (addr)
{
for (auto& lock : g_locks)
{
@ -216,6 +248,30 @@ namespace vm
}
}
g_addr_lock = addr;
for (auto& lock : g_range_locks)
{
while (true)
{
const u64 value = lock;
// Test beginning address
if (static_cast<u32>(value) > addr)
{
break;
}
// Test end address
if (static_cast<u32>(value >> 32) <= addr)
{
break;
}
_mm_pause();
}
}
for (auto& lock : g_locks)
{
while (cpu_thread* ptr = lock)
@ -225,7 +281,7 @@ namespace vm
break;
}
busy_wait();
_mm_pause();
}
}
}
@ -239,10 +295,8 @@ namespace vm
writer_lock::~writer_lock()
{
if (locked)
{
g_mutex.unlock();
}
g_addr_lock.raw() = 0;
g_mutex.unlock();
}
void reservation_lock_internal(atomic_t<u64>& res)