PPU: add LARX perf counter

Also refactor ppu_store_reservation a bit.
This commit is contained in:
Nekotekina 2020-10-29 21:46:50 +03:00
parent 425fce5070
commit fb24b06a5d
2 changed files with 26 additions and 20 deletions

View file

@ -854,6 +854,8 @@ ppu_thread::~ppu_thread()
{ {
dct->used -= stack_size; dct->used -= stack_size;
} }
perf_log.notice("Perf stats for STCX reload: successs %u, failure %u", last_succ, last_fail);
} }
ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached) ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached)
@ -1123,6 +1125,8 @@ static void ppu_trace(u64 addr)
template <typename T> template <typename T>
static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
{ {
perf_meter<"LARX"_u32> perf0;
// Do not allow stores accessed from the same cache line to past reservation load // Do not allow stores accessed from the same cache line to past reservation load
std::atomic_thread_fence(std::memory_order_seq_cst); std::atomic_thread_fence(std::memory_order_seq_cst);
@ -1354,7 +1358,6 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.and_(args[0].r32(), 63); c.and_(args[0].r32(), 63);
c.mov(x86::r12d, 1); c.mov(x86::r12d, 1);
c.mov(x86::r13, args[1]); c.mov(x86::r13, args[1]);
c.bswap(args[3]);
// Prepare data // Prepare data
if (s_tsx_avx) if (s_tsx_avx)
@ -1615,25 +1618,21 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
auto& data = vm::_ref<atomic_be_t<u64>>(addr & -8); auto& data = vm::_ref<atomic_be_t<u64>>(addr & -8);
auto& res = vm::reservation_acquire(addr, sizeof(T)); auto& res = vm::reservation_acquire(addr, sizeof(T));
const u64 old_data = reinterpret_cast<be_t<u64>&>(ppu.rdata[addr & 0x78]);
const u64 rtime = ppu.rtime; const u64 rtime = ppu.rtime;
be_t<u64> old_data = 0;
std::memcpy(&old_data, &ppu.rdata[addr & 0x78], sizeof(old_data));
be_t<u64> new_data = old_data;
if constexpr (sizeof(T) == sizeof(u32)) if constexpr (sizeof(T) == sizeof(u32))
{ {
// Rebuild reg_value to be 32-bits of new data and 32-bits of old data // Rebuild reg_value to be 32-bits of new data and 32-bits of old data
union bf64 const be_t<u32> reg32 = static_cast<u32>(reg_value);
{ std::memcpy(reinterpret_cast<char*>(&new_data) + (addr & 4), &reg32, sizeof(u32));
u64 all; }
bf_t<u64, 0, 32> low;
bf_t<u64, 32, 32> high;
} bf{old_data};
if (addr & 4)
bf.low = static_cast<u32>(reg_value);
else else
bf.high = static_cast<u32>(reg_value); {
new_data = reg_value;
reg_value = bf.all;
} }
// Test if store address is on the same aligned 8-bytes memory as load // Test if store address is on the same aligned 8-bytes memory as load
@ -1665,7 +1664,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
{ {
if (g_use_rtm) [[likely]] if (g_use_rtm) [[likely]]
{ {
switch (u32 count = ppu_stcx_accurate_tx(addr & -8, rtime, ppu.rdata, reg_value)) switch (u32 count = ppu_stcx_accurate_tx(addr & -8, rtime, ppu.rdata, std::bit_cast<u64>(new_data)))
{ {
case 0: case 0:
{ {
@ -1691,7 +1690,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
{ {
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128))) if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
{ {
data.release(reg_value); data.release(new_data);
res += 127; res += 127;
return true; return true;
} }
@ -1736,7 +1735,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
if (cmp_rdata(ppu.rdata, super_data)) if (cmp_rdata(ppu.rdata, super_data))
{ {
data.release(reg_value); data.release(new_data);
res += 64; res += 64;
return true; return true;
} }
@ -1748,7 +1747,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
return success; return success;
} }
if (reg_value == old_data) if (new_data == old_data)
{ {
return res.compare_and_swap_test(rtime, rtime + 128); return res.compare_and_swap_test(rtime, rtime + 128);
} }
@ -1774,7 +1773,8 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
return false; return false;
} }
if (data.compare_and_swap_test(old_data, reg_value)) // Store previous value in old_data on failure
if (data.compare_exchange(old_data, new_data))
{ {
res += 127; res += 127;
return true; return true;

View file

@ -219,6 +219,12 @@ public:
// Thread name // Thread name
stx::atomic_cptr<std::string> ppu_tname; stx::atomic_cptr<std::string> ppu_tname;
u64 last_ftsc = 0;
u64 last_ftime = 0;
u32 last_faddr = 0;
u64 last_fail = 0;
u64 last_succ = 0;
be_t<u64>* get_stack_arg(s32 i, u64 align = alignof(u64)); be_t<u64>* get_stack_arg(s32 i, u64 align = alignof(u64));
void exec_task(); void exec_task();
void fast_call(u32 addr, u32 rtoc); void fast_call(u32 addr, u32 rtoc);