TSX: adjust transaction logic

Allow more in first-chance transactions.
Allow abandonment of PUTLLC as in original path.
Make PUTLLUC unconditionally shared-locked.
Give PUTLLC +1 priority (minor change).
This commit is contained in:
Nekotekina 2020-10-28 05:57:25 +03:00
parent d6daa0d05b
commit ff550b5c3c
2 changed files with 48 additions and 32 deletions

View file

@ -1378,10 +1378,14 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
// Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4);
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.mov(x86::eax, _XABORT_EXPLICIT);
c.jc(fall);
c.xbegin(tx0);
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::eax, 127);
c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(skip);
c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13);
c.jne(fail);
@ -1471,19 +1475,19 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
Label fall2 = c.newLabel();
Label fail2 = c.newLabel();
Label fail3 = c.newLabel();
Label fail4 = c.newLabel();
// Lightened transaction: only compare and swap data
c.bind(next);
// Try to "lock" reservation
c.mov(x86::eax, 1);
c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax);
c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(fall2);
// Allow only first shared lock to proceed
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.and_(x86::r13, -128);
c.cmp(x86::rax, x86::r13);
c.jne(fail2);
c.add(x86::r13, vm::rsrv_unique_lock);
c.lock().cmpxchg(x86::qword_ptr(x86::rbx), x86::r13);
c.jnz(next);
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666);
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
@ -1493,9 +1497,10 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fall2);
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.and_(x86::rax, -128);
c.test(x86::eax, vm::rsrv_shared_mask);
c.jnz(fall2);
c.cmp(x86::rax, x86::r13);
c.jne(fail2);
c.jne(fail4);
c.xbegin(tx1);
if (s_tsx_avx)
@ -1535,7 +1540,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.mov(x86::qword_ptr(x86::rbp, args[0], 1, 0), args[3]);
c.xend();
c.lock().add(x86::qword_ptr(x86::rbx), 127);
c.lock().add(x86::qword_ptr(x86::rbx), 64);
c.mov(x86::eax, x86::r12d);
c.jmp(_ret);
@ -1569,8 +1574,11 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.mov(x86::eax, -1);
c.jmp(_ret);
c.bind(fail4);
c.lock().sub(x86::qword_ptr(x86::rbx), vm::rsrv_unique_lock);
//c.jmp(fail2);
c.bind(fail2);
c.lock().sub(x86::qword_ptr(x86::rbx), 1);
c.xor_(x86::eax, x86::eax);
//c.jmp(_ret);
@ -1681,16 +1689,16 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
}
}
return cpu_thread::suspend_all(&ppu, [&]
return cpu_thread::suspend_all<+1>(&ppu, [&]
{
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
{
data.release(reg_value);
res += 127;
res += 64;
return true;
}
res -= 1;
res -= vm::rsrv_unique_lock;
return false;
});
}