SPU/PPU: update reservation logic on TSX path transactions

Make use of lock bits in reservation counters.
On PPU, fallback to compare_and_swap instead of desperate retry.
On SPU, lighten write set on retry by 'locking' outside of the transaction.
This commit is contained in:
Nekotekina 2019-05-18 23:47:35 +03:00
parent 9abb303569
commit 7de3c410cf
2 changed files with 583 additions and 176 deletions

View file

@ -973,18 +973,26 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
ppu.raddr = addr;
u64 count = 0;
while (LIKELY(g_use_rtm))
{
ppu.rtime = vm::reservation_acquire(addr, sizeof(T)) & -128;
ppu.rdata = data;
if (LIKELY(vm::reservation_acquire(addr, sizeof(T)) == ppu.rtime))
if (LIKELY((vm::reservation_acquire(addr, sizeof(T)) & -128) == ppu.rtime))
{
if (UNLIKELY(count >= 10))
{
LOG_ERROR(PPU, "%s took too long: %u", sizeof(T) == 4 ? "LWARX" : "LDARX", count);
}
return static_cast<T>(ppu.rdata << data_off >> size_off);
}
else
{
_mm_pause();
count++;
}
}
@ -1040,7 +1048,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
return ppu_load_acquire_reservation<u64>(ppu, addr);
}
const auto ppu_stwcx_tx = build_function_asm<bool(*)(u32 raddr, u64 rtime, u64 rdata, u32 value)>([](asmjit::X86Assembler& c, auto& args)
const auto ppu_stwcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rdata, u32 value)>([](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;
@ -1057,11 +1065,12 @@ const auto ppu_stwcx_tx = build_function_asm<bool(*)(u32 raddr, u64 rtime, u64 r
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
c.bswap(args[2].r32());
c.bswap(args[3].r32());
c.mov(args[0].r32(), 5);
// Begin transaction
Label begin = build_transaction_enter(c, fall);
c.cmp(x86::qword_ptr(x86::r10), args[1]);
c.mov(x86::rax, x86::qword_ptr(x86::r10));
c.and_(x86::rax, -128);
c.cmp(x86::rax, args[1]);
c.jne(fail);
c.cmp(x86::dword_ptr(x86::r11), args[2].r32());
c.jne(fail);
@ -1071,19 +1080,12 @@ const auto ppu_stwcx_tx = build_function_asm<bool(*)(u32 raddr, u64 rtime, u64 r
c.mov(x86::eax, 1);
c.ret();
// Touch memory after transaction failure
// Return 2 after transaction failure
c.bind(fall);
c.sub(args[0].r32(), 1);
c.jz(fail);
c.sar(x86::eax, 24);
c.js(fail);
c.xor_(x86::r11, 0xf80);
c.xor_(x86::r10, 0xf80);
c.lock().add(x86::dword_ptr(x86::r11), 0);
c.lock().add(x86::qword_ptr(x86::r10), 0);
c.xor_(x86::r11, 0xf80);
c.xor_(x86::r10, 0xf80);
c.jmp(begin);
c.mov(x86::eax, 2);
c.ret();
c.bind(fail);
build_transaction_abort(c, 0xff);
@ -1104,15 +1106,43 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
if (LIKELY(g_use_rtm))
{
if (ppu_stwcx_tx(addr, ppu.rtime, old_data, reg_value))
switch (ppu_stwcx_tx(addr, ppu.rtime, old_data, reg_value))
{
case 0:
{
// Reservation lost
ppu.raddr = 0;
return false;
}
case 1:
{
vm::reservation_notifier(addr, sizeof(u32)).notify_all();
ppu.raddr = 0;
return true;
}
}
auto& res = vm::reservation_acquire(addr, sizeof(u32));
const auto [_, ok] = res.fetch_op([&](u64& reserv)
{
return (++reserv & -128) == ppu.rtime;
});
// Reservation lost
ppu.raddr = 0;
if (ok)
{
if (data.compare_and_swap_test(old_data, reg_value))
{
res += 127;
vm::reservation_notifier(addr, sizeof(u32)).notify_all();
return true;
}
res -= 1;
}
return false;
}
@ -1138,7 +1168,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
return result;
}
const auto ppu_stdcx_tx = build_function_asm<bool(*)(u32 raddr, u64 rtime, u64 rdata, u64 value)>([](asmjit::X86Assembler& c, auto& args)
const auto ppu_stdcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rdata, u64 value)>([](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;
@ -1155,11 +1185,12 @@ const auto ppu_stdcx_tx = build_function_asm<bool(*)(u32 raddr, u64 rtime, u64 r
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
c.bswap(args[2]);
c.bswap(args[3]);
c.mov(args[0].r32(), 5);
// Begin transaction
Label begin = build_transaction_enter(c, fall);
c.cmp(x86::qword_ptr(x86::r10), args[1]);
c.mov(x86::rax, x86::qword_ptr(x86::r10));
c.and_(x86::rax, -128);
c.cmp(x86::rax, args[1]);
c.jne(fail);
c.cmp(x86::qword_ptr(x86::r11), args[2]);
c.jne(fail);
@ -1169,19 +1200,12 @@ const auto ppu_stdcx_tx = build_function_asm<bool(*)(u32 raddr, u64 rtime, u64 r
c.mov(x86::eax, 1);
c.ret();
// Touch memory after transaction failure
// Return 2 after transaction failure
c.bind(fall);
c.sub(args[0].r32(), 1);
c.jz(fail);
c.sar(x86::eax, 24);
c.js(fail);
c.xor_(x86::r11, 0xf80);
c.xor_(x86::r10, 0xf80);
c.lock().add(x86::qword_ptr(x86::r11), 0);
c.lock().add(x86::qword_ptr(x86::r10), 0);
c.xor_(x86::r11, 0xf80);
c.xor_(x86::r10, 0xf80);
c.jmp(begin);
c.mov(x86::eax, 2);
c.ret();
c.bind(fail);
build_transaction_abort(c, 0xff);
@ -1202,15 +1226,43 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
if (LIKELY(g_use_rtm))
{
if (ppu_stdcx_tx(addr, ppu.rtime, old_data, reg_value))
switch (ppu_stdcx_tx(addr, ppu.rtime, old_data, reg_value))
{
case 0:
{
// Reservation lost
ppu.raddr = 0;
return false;
}
case 1:
{
vm::reservation_notifier(addr, sizeof(u64)).notify_all();
ppu.raddr = 0;
return true;
}
}
auto& res = vm::reservation_acquire(addr, sizeof(u64));
const auto [_, ok] = res.fetch_op([&](u64& reserv)
{
return (++reserv & -128) == ppu.rtime;
});
// Reservation lost
ppu.raddr = 0;
if (ok)
{
if (data.compare_and_swap_test(old_data, reg_value))
{
res += 127;
vm::reservation_notifier(addr, sizeof(u64)).notify_all();
return true;
}
res -= 1;
}
return false;
}