SPU/PPU reservations: Optimizations part 1

- Implement vm::reservation_trylock, optimized locking on reservation stores with no waiting. Always fail if reservation lock bitsa are set.
- Make SPU accurate GET transfers on non-TSX not modify reservation lock bits.
- Add some optimization regarding to unmodified data reservations writes.
This commit is contained in:
Eladash 2020-05-08 20:41:15 +03:00 committed by Ani
parent eb5ec211c2
commit 525453794f
3 changed files with 169 additions and 94 deletions

View file

@ -1214,10 +1214,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value)
constexpr u64 size_off = (sizeof(T) * 8) & 63; constexpr u64 size_off = (sizeof(T) * 8) & 63;
const T old_data = static_cast<T>(ppu.rdata << ((addr & 7) * 8) >> size_off); const T old_data = static_cast<T>(ppu.rdata << ((addr & 7) * 8) >> size_off);
auto& res = vm::reservation_acquire(addr, sizeof(T));
if (ppu.raddr != addr || addr % sizeof(T) || old_data != data.load() || ppu.rtime != (vm::reservation_acquire(addr, sizeof(T)) & -128)) if (std::exchange(ppu.raddr, 0) != addr || addr % sizeof(T) || old_data != data || ppu.rtime != res)
{ {
ppu.raddr = 0; return false;
}
if (reg_value == old_data)
{
if (res.compare_and_swap_test(ppu.rtime, ppu.rtime + 128))
{
res.notify_all();
return true;
}
return false; return false;
} }
@ -1230,27 +1241,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value)
case 0: case 0:
{ {
// Reservation lost // Reservation lost
ppu.raddr = 0;
return false; return false;
} }
case 1: case 1:
{ {
vm::reservation_notifier(addr, sizeof(T)).notify_all(); res.notify_all();
ppu.raddr = 0;
return true; return true;
} }
} }
auto& res = vm::reservation_acquire(addr, sizeof(T)); if (res == ppu.rtime && vm::reservation_trylock(res, ppu.rtime))
ppu.raddr = 0;
if (res == ppu.rtime && res.compare_and_swap_test(ppu.rtime, ppu.rtime | 1))
{ {
if (data.compare_and_swap_test(old_data, reg_value)) if (data.compare_and_swap_test(old_data, reg_value))
{ {
res += 127; res += 127;
vm::reservation_notifier(addr, sizeof(T)).notify_all(); res.notify_all();
return true; return true;
} }
@ -1260,25 +1265,23 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value)
return false; return false;
} }
vm::passive_unlock(ppu); if (!vm::reservation_trylock(res, ppu.rtime))
{
return false;
}
auto& res = vm::reservation_lock(addr, sizeof(T)); const bool result = data.compare_and_swap_test(old_data, reg_value);
const u64 old_time = res.load() & -128;
const bool result = ppu.rtime == old_time && data.compare_and_swap_test(old_data, reg_value);
if (result) if (result)
{ {
res.release(old_time + 128); res.release(ppu.rtime + 128);
vm::reservation_notifier(addr, sizeof(T)).notify_all(); res.notify_all();
} }
else else
{ {
res.release(old_time); res.release(ppu.rtime);
} }
vm::passive_lock(ppu);
ppu.raddr = 0;
return result; return result;
} }

View file

@ -1251,6 +1251,77 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
if (!g_use_rtm && (!is_get || g_cfg.core.spu_accurate_putlluc)) [[unlikely]] if (!g_use_rtm && (!is_get || g_cfg.core.spu_accurate_putlluc)) [[unlikely]]
{ {
if (const u32 size = args.size; ((eal & 127) + size) <= 128 && is_get)
{
for (u64 i = 0;; [&]()
{
if (++i < 25) [[likely]]
{
busy_wait(300);
}
else
{
std::this_thread::yield();
}
}())
{
const u64 time0 = vm::reservation_acquire(eal, size);
if (time0 & 1)
{
continue;
}
switch (size)
{
case 1:
{
*reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src);
break;
}
case 2:
{
*reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src);
break;
}
case 4:
{
*reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src);
break;
}
case 8:
{
*reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src);
break;
}
default:
{
auto _dst = dst;
auto _src = src;
auto _size = size;
while (_size)
{
*reinterpret_cast<v128*>(_dst) = *reinterpret_cast<const v128*>(_src);
_dst += 16;
_src += 16;
_size -= 16;
}
break;
}
}
if (time0 != vm::reservation_acquire(eal, size))
{
continue;
}
return;
}
}
switch (u32 size = args.size) switch (u32 size = args.size)
{ {
case 1: case 1:
@ -1592,7 +1663,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
if (raddr && addr == raddr) if (raddr && addr == raddr)
{ {
// Last check for event before we clear the reservation // Last check for event before we clear the reservation
if ((vm::reservation_acquire(addr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr))) if (vm::reservation_acquire(addr, 128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)))
{ {
ch_event_stat |= SPU_EVENT_LR; ch_event_stat |= SPU_EVENT_LR;
} }
@ -1808,12 +1879,6 @@ bool spu_thread::process_mfc_cmd()
const u32 addr = ch_mfc_cmd.eal & -128; const u32 addr = ch_mfc_cmd.eal & -128;
const auto& data = vm::_ref<decltype(rdata)>(addr); const auto& data = vm::_ref<decltype(rdata)>(addr);
if (addr == raddr && !g_use_rtm && g_cfg.core.spu_loop_detection && rtime == vm::reservation_acquire(addr, 128) && cmp_rdata(rdata, data))
{
// Spinning, might as well yield cpu resources
std::this_thread::yield();
}
auto& dst = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80); auto& dst = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80);
u64 ntime; u64 ntime;
@ -1873,7 +1938,7 @@ bool spu_thread::process_mfc_cmd()
if (raddr && raddr != addr) if (raddr && raddr != addr)
{ {
// Last check for event before we replace the reservation with a new one // Last check for event before we replace the reservation with a new one
if ((vm::reservation_acquire(raddr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr))) if (vm::reservation_acquire(raddr, 128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
{ {
ch_event_stat |= SPU_EVENT_LR; ch_event_stat |= SPU_EVENT_LR;
} }
@ -1899,65 +1964,71 @@ bool spu_thread::process_mfc_cmd()
{ {
// Store conditionally // Store conditionally
const u32 addr = ch_mfc_cmd.eal & -128; const u32 addr = ch_mfc_cmd.eal & -128;
u32 result = 0;
if (raddr == addr) if ([&]()
{ {
if (raddr != addr)
{
return false;
}
const auto& to_write = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80); const auto& to_write = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80);
auto& res = vm::reservation_acquire(addr, 128);
if (!g_use_rtm && rtime != res)
{
return false;
}
if (cmp_rdata(to_write, rdata))
{
// Writeback of unchanged data. Only check memory change
return cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)) && res.compare_and_swap_test(rtime, rtime + 128);
}
if (g_use_rtm) [[likely]] if (g_use_rtm) [[likely]]
{ {
result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()); switch (spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()))
{
if (result == 2) case 2:
{ {
result = 0;
cpu_thread::suspend_all cpu_lock(this); cpu_thread::suspend_all cpu_lock(this);
// Give up if PUTLLUC happened // Give up if PUTLLUC happened
if (vm::reservation_acquire(addr, 128) == (rtime | 1)) if (res == (rtime | 1))
{ {
auto& data = vm::_ref<decltype(rdata)>(addr); auto& data = vm::_ref<decltype(rdata)>(addr);
if ((vm::reservation_acquire(addr, 128) & -128) == rtime && cmp_rdata(rdata, data)) if (cmp_rdata(rdata, data))
{ {
mov_rdata(data, to_write); mov_rdata(data, to_write);
vm::reservation_acquire(addr, 128) += 127; res += 127;
result = 1; return true;
}
else
{
vm::reservation_acquire(addr, 128) -= 1;
} }
} }
else
{
vm::reservation_acquire(addr, 128) -= 1;
}
}
}
else if (auto& data = vm::_ref<decltype(rdata)>(addr); rtime == (vm::reservation_acquire(raddr, 128) & -128))
{
if (cmp_rdata(rdata, to_write))
{
// Writeback of unchanged data. Only check memory change
result = cmp_rdata(rdata, data) && vm::reservation_acquire(raddr, 128).compare_and_swap_test(rtime, rtime + 128);
}
else
{
auto& res = vm::reservation_lock(raddr, 128);
const u64 old_time = res.load() & -128;
if (rtime == old_time) res -= 1;
return false;
}
case 1: return true;
case 0: return false;
default: ASSUME(0);
}
}
if (!vm::reservation_trylock(res, rtime))
{ {
*reinterpret_cast<atomic_t<u32>*>(&data) += 0; return false;
}
vm::_ref<atomic_t<u32>>(addr) += 0;
const auto render = get_rsx_if_needs_res_pause(addr); const auto render = get_rsx_if_needs_res_pause(addr);
if (render) render->pause(); if (render) render->pause();
auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr); auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
const bool success = [&]()
{ {
// Full lock (heavyweight) // Full lock (heavyweight)
// TODO: vm::check_addr // TODO: vm::check_addr
@ -1966,26 +2037,17 @@ bool spu_thread::process_mfc_cmd()
if (cmp_rdata(rdata, super_data)) if (cmp_rdata(rdata, super_data))
{ {
mov_rdata(super_data, to_write); mov_rdata(super_data, to_write);
res.release(old_time + 128); res.release(rtime + 128);
result = 1; return true;
}
else
{
res.release(old_time);
}
} }
res.release(rtime);
return false;
}();
if (render) render->unpause(); if (render) render->unpause();
} return success;
else }())
{
res.release(old_time);
}
}
}
}
if (result)
{ {
vm::reservation_notifier(addr, 128).notify_all(); vm::reservation_notifier(addr, 128).notify_all();
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS); ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);

View file

@ -40,4 +40,14 @@ namespace vm
return res; return res;
} }
inline bool reservation_trylock(atomic_t<u64>& res, u64 rtime)
{
if (res.compare_and_swap_test(rtime, rtime | 1)) [[likely]]
{
return true;
}
return false;
}
} // namespace vm } // namespace vm