mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 22:11:26 +12:00
Fix perf_meter<> after adding thread pool.
Print and reset sorted information after emulation stop/pause. Print useless average value.
This commit is contained in:
parent
3e1344e4e4
commit
d4d5dc99f3
3 changed files with 85 additions and 17 deletions
|
@ -3,6 +3,7 @@
|
||||||
#include "Utilities/bin_patch.h"
|
#include "Utilities/bin_patch.h"
|
||||||
#include "Emu/Memory/vm.h"
|
#include "Emu/Memory/vm.h"
|
||||||
#include "Emu/System.h"
|
#include "Emu/System.h"
|
||||||
|
#include "Emu/perf_meter.hpp"
|
||||||
|
|
||||||
#include "Emu/Cell/PPUThread.h"
|
#include "Emu/Cell/PPUThread.h"
|
||||||
#include "Emu/Cell/PPUCallback.h"
|
#include "Emu/Cell/PPUCallback.h"
|
||||||
|
@ -1797,6 +1798,8 @@ void Emulator::Resume()
|
||||||
ppu_log.notice("[RESUME] Dumping instruction stats:%s", dump);
|
ppu_log.notice("[RESUME] Dumping instruction stats:%s", dump);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
perf_stat_base::report();
|
||||||
|
|
||||||
// Try to resume
|
// Try to resume
|
||||||
if (!m_state.compare_and_swap_test(system_state::paused, system_state::running))
|
if (!m_state.compare_and_swap_test(system_state::paused, system_state::running))
|
||||||
{
|
{
|
||||||
|
@ -1880,6 +1883,8 @@ void Emulator::Stop(bool restart)
|
||||||
|
|
||||||
jit_runtime::finalize();
|
jit_runtime::finalize();
|
||||||
|
|
||||||
|
perf_stat_base::report();
|
||||||
|
|
||||||
static u64 aw_refs = 0;
|
static u64 aw_refs = 0;
|
||||||
static u64 aw_colm = 0;
|
static u64 aw_colm = 0;
|
||||||
static u64 aw_colc = 0;
|
static u64 aw_colc = 0;
|
||||||
|
|
|
@ -1,11 +1,19 @@
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "perf_meter.hpp"
|
#include "perf_meter.hpp"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
void perf_stat_base::push(u64 ns[66]) noexcept
|
void perf_stat_base::push(u64 ns[66]) noexcept
|
||||||
{
|
{
|
||||||
|
if (!ns[0])
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (u32 i = 0; i < 66; i++)
|
for (u32 i = 0; i < 66; i++)
|
||||||
{
|
{
|
||||||
m_log[i] += ns[i];
|
m_log[i] += atomic_storage<u64>::exchange(ns[i], 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,7 +21,7 @@ void perf_stat_base::print(const char* name) noexcept
|
||||||
{
|
{
|
||||||
if (u64 num_total = m_log[0].load())
|
if (u64 num_total = m_log[0].load())
|
||||||
{
|
{
|
||||||
perf_log.notice("Perf stats for %s: total events: %u (total time %.4fs)", name, num_total, m_log[65].load() / 1000'000'000.);
|
perf_log.notice(u8"Perf stats for %s: total events: %u (total time %.4fs, avg %.4fµs)", name, num_total, m_log[65].load() / 1000'000'000., m_log[65].load() / 1000. / num_total);
|
||||||
|
|
||||||
for (u32 i = 0; i < 13; i++)
|
for (u32 i = 0; i < 13; i++)
|
||||||
{
|
{
|
||||||
|
@ -56,3 +64,55 @@ void perf_stat_base::print(const char* name) noexcept
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static shared_mutex s_perf_mutex;
|
||||||
|
|
||||||
|
static std::map<std::string, perf_stat_base> s_perf_acc;
|
||||||
|
|
||||||
|
static std::multimap<std::string, u64*> s_perf_sources;
|
||||||
|
|
||||||
|
void perf_stat_base::add(u64 ns[66], const char* name) noexcept
|
||||||
|
{
|
||||||
|
std::lock_guard lock(s_perf_mutex);
|
||||||
|
|
||||||
|
s_perf_sources.emplace(name, ns);
|
||||||
|
s_perf_acc[name];
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_stat_base::remove(u64 ns[66], const char* name) noexcept
|
||||||
|
{
|
||||||
|
std::lock_guard lock(s_perf_mutex);
|
||||||
|
|
||||||
|
const auto found = s_perf_sources.equal_range(name);
|
||||||
|
|
||||||
|
for (auto it = found.first; it != found.second; it++)
|
||||||
|
{
|
||||||
|
if (it->second == ns)
|
||||||
|
{
|
||||||
|
s_perf_acc[name].push(ns);
|
||||||
|
s_perf_sources.erase(it);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_stat_base::report() noexcept
|
||||||
|
{
|
||||||
|
std::lock_guard lock(s_perf_mutex);
|
||||||
|
|
||||||
|
perf_log.notice("Performance report begin (%u src, %u acc):", s_perf_sources.size(), s_perf_acc.size());
|
||||||
|
|
||||||
|
for (auto& [name, ns] : s_perf_sources)
|
||||||
|
{
|
||||||
|
s_perf_acc[name].push(ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& [name, data] : s_perf_acc)
|
||||||
|
{
|
||||||
|
data.print(name.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
s_perf_acc.clear();
|
||||||
|
|
||||||
|
perf_log.notice("Performance report end.");
|
||||||
|
}
|
||||||
|
|
|
@ -31,6 +31,12 @@ protected:
|
||||||
// Accumulate values from a thread
|
// Accumulate values from a thread
|
||||||
void push(u64 ns[66]) noexcept;
|
void push(u64 ns[66]) noexcept;
|
||||||
|
|
||||||
|
// Register TLS storage for stats
|
||||||
|
static void add(u64 ns[66], const char* name) noexcept;
|
||||||
|
|
||||||
|
// Unregister TLS storage and drain its data
|
||||||
|
static void remove(u64 ns[66], const char* name) noexcept;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
perf_stat_base() noexcept = default;
|
perf_stat_base() noexcept = default;
|
||||||
|
|
||||||
|
@ -39,6 +45,9 @@ public:
|
||||||
perf_stat_base& operator =(const perf_stat_base&) = delete;
|
perf_stat_base& operator =(const perf_stat_base&) = delete;
|
||||||
|
|
||||||
~perf_stat_base() {}
|
~perf_stat_base() {}
|
||||||
|
|
||||||
|
// Collect all data, report it, and clean
|
||||||
|
static void report() noexcept;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Object that prints event length stats at the end
|
// Object that prints event length stats at the end
|
||||||
|
@ -50,26 +59,20 @@ class perf_stat final : public perf_stat_base
|
||||||
// Local non-atomic values for increments
|
// Local non-atomic values for increments
|
||||||
u64 m_log[66]{};
|
u64 m_log[66]{};
|
||||||
|
|
||||||
|
perf_stat_local() noexcept
|
||||||
|
{
|
||||||
|
perf_stat_base::add(m_log, perf_name<ShortName>.data());
|
||||||
|
}
|
||||||
|
|
||||||
~perf_stat_local()
|
~perf_stat_local()
|
||||||
{
|
{
|
||||||
// Update on thread exit
|
perf_stat_base::remove(m_log, perf_name<ShortName>.data());
|
||||||
if (m_log[0])
|
|
||||||
{
|
|
||||||
if (auto* pfs = g_fxo->get<perf_stat>())
|
|
||||||
{
|
|
||||||
pfs->perf_stat_base::push(m_log);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} g_tls_perf_stat;
|
} g_tls_perf_stat;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
~perf_stat()
|
static void push(u64 ns) noexcept
|
||||||
{
|
|
||||||
perf_stat_base::print(perf_name<ShortName>.data());
|
|
||||||
}
|
|
||||||
|
|
||||||
void push(u64 ns) noexcept
|
|
||||||
{
|
{
|
||||||
auto& data = g_tls_perf_stat.m_log;
|
auto& data = g_tls_perf_stat.m_log;
|
||||||
data[0] += ns != 0;
|
data[0] += ns != 0;
|
||||||
|
@ -178,7 +181,7 @@ public:
|
||||||
const f64 diff = (end_time - m_timestamps[0]) * 1. / utils::get_tsc_freq();
|
const f64 diff = (end_time - m_timestamps[0]) * 1. / utils::get_tsc_freq();
|
||||||
|
|
||||||
// Register perf stat in nanoseconds
|
// Register perf stat in nanoseconds
|
||||||
g_fxo->get<perf_stat<ShortName>>()->push(static_cast<u64>(diff * 1000'000'000.));
|
perf_stat<ShortName>::push(static_cast<u64>(diff * 1000'000'000.));
|
||||||
|
|
||||||
// Print in microseconds
|
// Print in microseconds
|
||||||
if (static_cast<u64>(diff * 1000'000.) >= g_cfg.core.perf_report_threshold)
|
if (static_cast<u64>(diff * 1000'000.) >= g_cfg.core.perf_report_threshold)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue