mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-08 07:51:28 +12:00
Build transactions at runtime
Drop _xbegin family intrinsics due to bad codegen Implemented `notifier` class, replacing vm::notify Minor optimization: detach transactions from global mutex on TSX path Minor optimization: don't acquire vm::passive_lock on PPU on TSX path
This commit is contained in:
parent
fd525ae1cf
commit
367f039523
14 changed files with 529 additions and 339 deletions
|
@ -1,6 +1,7 @@
|
|||
#include "stdafx.h"
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
#include "Utilities/JIT.h"
|
||||
#include "Crypto/sha1.h"
|
||||
#include "Emu/Memory/Memory.h"
|
||||
#include "Emu/System.h"
|
||||
|
@ -46,7 +47,6 @@
|
|||
#endif
|
||||
#include "define_new_memleakdetect.h"
|
||||
|
||||
#include "Utilities/JIT.h"
|
||||
#include "PPUTranslator.h"
|
||||
#include "Modules/cellMsgDialog.h"
|
||||
#endif
|
||||
|
@ -55,8 +55,6 @@
|
|||
#include <cfenv>
|
||||
#include "Utilities/GSL.h"
|
||||
|
||||
const bool s_use_rtm = utils::has_rtm();
|
||||
|
||||
const bool s_use_ssse3 =
|
||||
#ifdef _MSC_VER
|
||||
utils::has_ssse3();
|
||||
|
@ -713,7 +711,12 @@ ppu_thread::ppu_thread(const std::string& name, u32 prio, u32 stack)
|
|||
, m_name(name)
|
||||
{
|
||||
// Trigger the scheduler
|
||||
state += cpu_flag::suspend + cpu_flag::memory;
|
||||
state += cpu_flag::suspend;
|
||||
|
||||
if (!g_use_rtm)
|
||||
{
|
||||
state += cpu_flag::memory;
|
||||
}
|
||||
}
|
||||
|
||||
void ppu_thread::cmd_push(cmd64 cmd)
|
||||
|
@ -942,7 +945,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
|
|||
ppu.raddr = addr;
|
||||
|
||||
// Do several attemps
|
||||
for (uint i = 0; i < 5; i++)
|
||||
for (uint i = 0; g_use_rtm || i < 5; i++)
|
||||
{
|
||||
ppu.rtime = vm::reservation_acquire(addr, sizeof(T));
|
||||
_mm_lfence();
|
||||
|
@ -978,6 +981,57 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
|
|||
return ppu_load_acquire_reservation<u64>(ppu, addr);
|
||||
}
|
||||
|
||||
const auto ppu_stwcx_tx = build_function_asm<int(*)(u32 raddr, u64 rtime, u64 rdata, u32 value)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 7);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
||||
c.bswap(args[2].r32());
|
||||
c.bswap(args[3].r32());
|
||||
|
||||
// Touch memory (heavyweight)
|
||||
c.lock().add(x86::dword_ptr(x86::r11), 0);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.lock().xadd(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
c.jne(fail);
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.cmp(x86::qword_ptr(x86::r10), args[1]);
|
||||
c.jne(fail);
|
||||
c.cmp(x86::dword_ptr(x86::r11), args[2].r32());
|
||||
c.jne(fail);
|
||||
c.mov(x86::dword_ptr(x86::r11), args[3].r32());
|
||||
c.rdtsc(); // destroys args[1] or args[2]
|
||||
c.shl(x86::rdx, 33);
|
||||
c.shl(x86::rax, 1);
|
||||
c.or_(x86::rax, x86::rdx);
|
||||
c.mov(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.xend();
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.ret();
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.or_(x86::eax, -1);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
||||
{
|
||||
atomic_be_t<u32>& data = vm::_ref<atomic_be_t<u32>>(addr);
|
||||
|
@ -988,24 +1042,31 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
if (g_use_rtm)
|
||||
{
|
||||
if (!vm::g_mutex.is_lockable() || vm::g_mutex.is_reading())
|
||||
// Do several attempts (TODO)
|
||||
for (u32 i = 0; i < 5; i++)
|
||||
{
|
||||
_xabort(0);
|
||||
const int r = ppu_stwcx_tx(addr, ppu.rtime, ppu.rdata, reg_value);
|
||||
|
||||
if (r > 0)
|
||||
{
|
||||
vm::reservation_notifier(addr, sizeof(u32)).notify_all();
|
||||
ppu.raddr = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
{
|
||||
// Reservation lost
|
||||
ppu.raddr = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u32));
|
||||
vm::notify(addr, sizeof(u32));
|
||||
}
|
||||
|
||||
_xend();
|
||||
// Give up
|
||||
ppu.raddr = 0;
|
||||
return result;
|
||||
return false;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
|
@ -1015,13 +1076,64 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
|||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u32));
|
||||
vm::notify(addr, sizeof(u32));
|
||||
vm::reservation_notifier(addr, sizeof(u32)).notify_all();
|
||||
}
|
||||
|
||||
ppu.raddr = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto ppu_stdcx_tx = build_function_asm<int(*)(u32 raddr, u64 rtime, u64 rdata, u64 value)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 7);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
||||
c.bswap(args[2]);
|
||||
c.bswap(args[3]);
|
||||
|
||||
// Touch memory (heavyweight)
|
||||
c.lock().add(x86::qword_ptr(x86::r11), 0);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.lock().xadd(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
c.jne(fail);
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.cmp(x86::qword_ptr(x86::r10), args[1]);
|
||||
c.jne(fail);
|
||||
c.cmp(x86::qword_ptr(x86::r11), args[2]);
|
||||
c.jne(fail);
|
||||
c.mov(x86::qword_ptr(x86::r11), args[3]);
|
||||
c.rdtsc(); // destroys args[1] or args[2]
|
||||
c.shl(x86::rdx, 33);
|
||||
c.shl(x86::rax, 1);
|
||||
c.or_(x86::rax, x86::rdx);
|
||||
c.mov(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.xend();
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.ret();
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.or_(x86::eax, -1);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
{
|
||||
atomic_be_t<u64>& data = vm::_ref<atomic_be_t<u64>>(addr);
|
||||
|
@ -1032,24 +1144,31 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
if (g_use_rtm)
|
||||
{
|
||||
if (!vm::g_mutex.is_lockable() || vm::g_mutex.is_reading())
|
||||
// Do several attempts (TODO)
|
||||
for (u32 i = 0; i < 5; i++)
|
||||
{
|
||||
_xabort(0);
|
||||
const int r = ppu_stdcx_tx(addr, ppu.rtime, ppu.rdata, reg_value);
|
||||
|
||||
if (r > 0)
|
||||
{
|
||||
vm::reservation_notifier(addr, sizeof(u64)).notify_all();
|
||||
ppu.raddr = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
{
|
||||
// Reservation lost
|
||||
ppu.raddr = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u64));
|
||||
vm::notify(addr, sizeof(u64));
|
||||
}
|
||||
|
||||
_xend();
|
||||
// Give up
|
||||
ppu.raddr = 0;
|
||||
return result;
|
||||
return false;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
|
@ -1059,7 +1178,7 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u64));
|
||||
vm::notify(addr, sizeof(u64));
|
||||
vm::reservation_notifier(addr, sizeof(u64)).notify_all();
|
||||
}
|
||||
|
||||
ppu.raddr = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue