SPU/PPU reservations: Optimizations part 1

- Implement vm::reservation_trylock, optimized locking on reservation stores with no waiting. Always fail if reservation lock bitsa are set.
- Make SPU accurate GET transfers on non-TSX not modify reservation lock bits.
- Add some optimization regarding to unmodified data reservations writes.
This commit is contained in:
Eladash 2020-05-08 20:41:15 +03:00 committed by Ani
parent eb5ec211c2
commit 525453794f
3 changed files with 169 additions and 94 deletions

View file

@ -1214,10 +1214,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value)
constexpr u64 size_off = (sizeof(T) * 8) & 63;
const T old_data = static_cast<T>(ppu.rdata << ((addr & 7) * 8) >> size_off);
auto& res = vm::reservation_acquire(addr, sizeof(T));
if (ppu.raddr != addr || addr % sizeof(T) || old_data != data.load() || ppu.rtime != (vm::reservation_acquire(addr, sizeof(T)) & -128))
if (std::exchange(ppu.raddr, 0) != addr || addr % sizeof(T) || old_data != data || ppu.rtime != res)
{
ppu.raddr = 0;
return false;
}
if (reg_value == old_data)
{
if (res.compare_and_swap_test(ppu.rtime, ppu.rtime + 128))
{
res.notify_all();
return true;
}
return false;
}
@ -1230,27 +1241,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value)
case 0:
{
// Reservation lost
ppu.raddr = 0;
return false;
}
case 1:
{
vm::reservation_notifier(addr, sizeof(T)).notify_all();
ppu.raddr = 0;
res.notify_all();
return true;
}
}
auto& res = vm::reservation_acquire(addr, sizeof(T));
ppu.raddr = 0;
if (res == ppu.rtime && res.compare_and_swap_test(ppu.rtime, ppu.rtime | 1))
if (res == ppu.rtime && vm::reservation_trylock(res, ppu.rtime))
{
if (data.compare_and_swap_test(old_data, reg_value))
{
res += 127;
vm::reservation_notifier(addr, sizeof(T)).notify_all();
res.notify_all();
return true;
}
@ -1260,25 +1265,23 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, T reg_value)
return false;
}
vm::passive_unlock(ppu);
if (!vm::reservation_trylock(res, ppu.rtime))
{
return false;
}
auto& res = vm::reservation_lock(addr, sizeof(T));
const u64 old_time = res.load() & -128;
const bool result = ppu.rtime == old_time && data.compare_and_swap_test(old_data, reg_value);
const bool result = data.compare_and_swap_test(old_data, reg_value);
if (result)
{
res.release(old_time + 128);
vm::reservation_notifier(addr, sizeof(T)).notify_all();
res.release(ppu.rtime + 128);
res.notify_all();
}
else
{
res.release(old_time);
res.release(ppu.rtime);
}
vm::passive_lock(ppu);
ppu.raddr = 0;
return result;
}

View file

@ -1251,6 +1251,77 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
if (!g_use_rtm && (!is_get || g_cfg.core.spu_accurate_putlluc)) [[unlikely]]
{
if (const u32 size = args.size; ((eal & 127) + size) <= 128 && is_get)
{
for (u64 i = 0;; [&]()
{
if (++i < 25) [[likely]]
{
busy_wait(300);
}
else
{
std::this_thread::yield();
}
}())
{
const u64 time0 = vm::reservation_acquire(eal, size);
if (time0 & 1)
{
continue;
}
switch (size)
{
case 1:
{
*reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src);
break;
}
case 2:
{
*reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src);
break;
}
case 4:
{
*reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src);
break;
}
case 8:
{
*reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src);
break;
}
default:
{
auto _dst = dst;
auto _src = src;
auto _size = size;
while (_size)
{
*reinterpret_cast<v128*>(_dst) = *reinterpret_cast<const v128*>(_src);
_dst += 16;
_src += 16;
_size -= 16;
}
break;
}
}
if (time0 != vm::reservation_acquire(eal, size))
{
continue;
}
return;
}
}
switch (u32 size = args.size)
{
case 1:
@ -1592,7 +1663,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
if (raddr && addr == raddr)
{
// Last check for event before we clear the reservation
if ((vm::reservation_acquire(addr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)))
if (vm::reservation_acquire(addr, 128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)))
{
ch_event_stat |= SPU_EVENT_LR;
}
@ -1808,12 +1879,6 @@ bool spu_thread::process_mfc_cmd()
const u32 addr = ch_mfc_cmd.eal & -128;
const auto& data = vm::_ref<decltype(rdata)>(addr);
if (addr == raddr && !g_use_rtm && g_cfg.core.spu_loop_detection && rtime == vm::reservation_acquire(addr, 128) && cmp_rdata(rdata, data))
{
// Spinning, might as well yield cpu resources
std::this_thread::yield();
}
auto& dst = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80);
u64 ntime;
@ -1873,7 +1938,7 @@ bool spu_thread::process_mfc_cmd()
if (raddr && raddr != addr)
{
// Last check for event before we replace the reservation with a new one
if ((vm::reservation_acquire(raddr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
if (vm::reservation_acquire(raddr, 128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
{
ch_event_stat |= SPU_EVENT_LR;
}
@ -1899,93 +1964,90 @@ bool spu_thread::process_mfc_cmd()
{
// Store conditionally
const u32 addr = ch_mfc_cmd.eal & -128;
u32 result = 0;
if (raddr == addr)
if ([&]()
{
if (raddr != addr)
{
return false;
}
const auto& to_write = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80);
auto& res = vm::reservation_acquire(addr, 128);
if (!g_use_rtm && rtime != res)
{
return false;
}
if (cmp_rdata(to_write, rdata))
{
// Writeback of unchanged data. Only check memory change
return cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)) && res.compare_and_swap_test(rtime, rtime + 128);
}
if (g_use_rtm) [[likely]]
{
result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data());
if (result == 2)
switch (spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()))
{
case 2:
{
result = 0;
cpu_thread::suspend_all cpu_lock(this);
// Give up if PUTLLUC happened
if (vm::reservation_acquire(addr, 128) == (rtime | 1))
if (res == (rtime | 1))
{
auto& data = vm::_ref<decltype(rdata)>(addr);
if ((vm::reservation_acquire(addr, 128) & -128) == rtime && cmp_rdata(rdata, data))
if (cmp_rdata(rdata, data))
{
mov_rdata(data, to_write);
vm::reservation_acquire(addr, 128) += 127;
result = 1;
}
else
{
vm::reservation_acquire(addr, 128) -= 1;
res += 127;
return true;
}
}
else
{
vm::reservation_acquire(addr, 128) -= 1;
}
res -= 1;
return false;
}
case 1: return true;
case 0: return false;
default: ASSUME(0);
}
}
else if (auto& data = vm::_ref<decltype(rdata)>(addr); rtime == (vm::reservation_acquire(raddr, 128) & -128))
if (!vm::reservation_trylock(res, rtime))
{
if (cmp_rdata(rdata, to_write))
{
// Writeback of unchanged data. Only check memory change
result = cmp_rdata(rdata, data) && vm::reservation_acquire(raddr, 128).compare_and_swap_test(rtime, rtime + 128);
}
else
{
auto& res = vm::reservation_lock(raddr, 128);
const u64 old_time = res.load() & -128;
if (rtime == old_time)
{
*reinterpret_cast<atomic_t<u32>*>(&data) += 0;
const auto render = get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
{
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
if (cmp_rdata(rdata, super_data))
{
mov_rdata(super_data, to_write);
res.release(old_time + 128);
result = 1;
}
else
{
res.release(old_time);
}
}
if (render) render->unpause();
}
else
{
res.release(old_time);
}
}
return false;
}
}
if (result)
vm::_ref<atomic_t<u32>>(addr) += 0;
const auto render = get_rsx_if_needs_res_pause(addr);
if (render) render->pause();
auto& super_data = *vm::get_super_ptr<decltype(rdata)>(addr);
const bool success = [&]()
{
// Full lock (heavyweight)
// TODO: vm::check_addr
vm::writer_lock lock(addr);
if (cmp_rdata(rdata, super_data))
{
mov_rdata(super_data, to_write);
res.release(rtime + 128);
return true;
}
res.release(rtime);
return false;
}();
if (render) render->unpause();
return success;
}())
{
vm::reservation_notifier(addr, 128).notify_all();
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);

View file

@ -40,4 +40,14 @@ namespace vm
return res;
}
inline bool reservation_trylock(atomic_t<u64>& res, u64 rtime)
{
if (res.compare_and_swap_test(rtime, rtime | 1)) [[likely]]
{
return true;
}
return false;
}
} // namespace vm