mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 22:11:26 +12:00
SPU Profiler preview
Add option "SPU Profiler" (disabled by default). Works only with SPU recompilers. Results are flushed on pausing.
This commit is contained in:
parent
1a9e06d3c6
commit
c69fe0f664
7 changed files with 289 additions and 1 deletions
|
@ -9,6 +9,8 @@
|
|||
#include "Emu/Cell/SPUThread.h"
|
||||
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
DECLARE(cpu_thread::g_threads_created){0};
|
||||
DECLARE(cpu_thread::g_threads_deleted){0};
|
||||
|
@ -46,6 +48,196 @@ void fmt_class_string<bs_t<cpu_flag>>::format(std::string& out, u64 arg)
|
|||
format_bitset(out, arg, "[", "|", "]", &fmt_class_string<cpu_flag>::format);
|
||||
}
|
||||
|
||||
// CPU profiler thread
|
||||
struct cpu_prof
|
||||
{
|
||||
// PPU/SPU id enqueued for registration
|
||||
lf_queue<u32> registered;
|
||||
|
||||
struct sample_info
|
||||
{
|
||||
// Weak pointer to the thread
|
||||
std::weak_ptr<cpu_thread> wptr;
|
||||
|
||||
// Block occurences: name -> sample_count
|
||||
std::unordered_map<u64, u64, value_hash<u64>> freq;
|
||||
|
||||
// Total number of samples
|
||||
u64 samples = 0, idle = 0;
|
||||
|
||||
sample_info(const std::shared_ptr<cpu_thread>& ptr)
|
||||
: wptr(ptr)
|
||||
{
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
freq.clear();
|
||||
samples = 0;
|
||||
idle = 0;
|
||||
}
|
||||
|
||||
// Print info
|
||||
void print(u32 id) const
|
||||
{
|
||||
// Make reversed map: sample_count -> name
|
||||
std::multimap<u64, u64> chart;
|
||||
|
||||
for (auto& [name, count] : freq)
|
||||
{
|
||||
// Inverse bits to sort in descending order
|
||||
chart.emplace(~count, name);
|
||||
}
|
||||
|
||||
// Print results
|
||||
std::string results;
|
||||
results.reserve(5100);
|
||||
|
||||
// Fraction of non-idle samples
|
||||
const f64 busy = 1. * (samples - idle) / samples;
|
||||
|
||||
for (auto& [rcount, name] : chart)
|
||||
{
|
||||
// Get correct count value
|
||||
const u64 count = ~rcount;
|
||||
const f64 _frac = count / busy / samples;
|
||||
|
||||
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
|
||||
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
|
||||
results.resize(results.size() - 4);
|
||||
|
||||
// Print chunk address from lowest 16 bits
|
||||
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
||||
|
||||
if (results.size() >= 5000)
|
||||
{
|
||||
// Stop printing after reaching some arbitrary limit in characters
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_NOTICE(GENERAL, "Thread [0x%08x]: %u samples (%.4f%% idle):%s", id, samples, 100. * idle / samples, results);
|
||||
}
|
||||
};
|
||||
|
||||
void operator()()
|
||||
{
|
||||
std::unordered_map<u32, sample_info, value_hash<u64>> threads;
|
||||
|
||||
while (thread_ctrl::state() != thread_state::aborting)
|
||||
{
|
||||
bool flush = false;
|
||||
|
||||
// Handle registration channel
|
||||
for (u32 id : registered.pop_all())
|
||||
{
|
||||
if (id == 0)
|
||||
{
|
||||
// Handle id zero as a command to flush results
|
||||
flush = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::shared_ptr<cpu_thread> ptr;
|
||||
|
||||
if (id >> 24 == 1)
|
||||
{
|
||||
ptr = idm::get<named_thread<ppu_thread>>(id);
|
||||
}
|
||||
else if (id >> 24 == 2)
|
||||
{
|
||||
ptr = idm::get<named_thread<spu_thread>>(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(GENERAL, "Invalid Thread ID: 0x%08x", id);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
auto [found, add] = threads.try_emplace(id, ptr);
|
||||
|
||||
if (!add)
|
||||
{
|
||||
// Overwritten: print previous data
|
||||
found->second.print(id);
|
||||
found->second.reset();
|
||||
found->second.wptr = ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (threads.empty())
|
||||
{
|
||||
// Wait for messages if no work (don't waste CPU)
|
||||
registered.wait();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sample active threads
|
||||
for (auto& [id, info] : threads)
|
||||
{
|
||||
if (auto ptr = info.wptr.lock())
|
||||
{
|
||||
// Get short function hash
|
||||
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
|
||||
|
||||
// Append occurrence
|
||||
info.samples++;
|
||||
|
||||
if (!(ptr->state.load() & (cpu_flag::wait + cpu_flag::stop + cpu_flag::dbg_global_pause)))
|
||||
{
|
||||
info.freq[name]++;
|
||||
|
||||
// Append verification time to fixed common name 0000000...chunk-0x3fffc
|
||||
if ((name & 0xffff) == 0)
|
||||
info.freq[0xffff]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
info.idle++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup and print results for deleted threads
|
||||
for (auto it = threads.begin(), end = threads.end(); it != end;)
|
||||
{
|
||||
if (it->second.wptr.expired())
|
||||
it->second.print(it->first), it = threads.erase(it);
|
||||
else
|
||||
it++;
|
||||
}
|
||||
|
||||
if (flush)
|
||||
{
|
||||
LOG_SUCCESS(GENERAL, "Flushing profiling results...");
|
||||
|
||||
// Print all results and cleanup
|
||||
for (auto& [id, info] : threads)
|
||||
{
|
||||
info.print(id);
|
||||
info.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Wait, roughly for 20µs
|
||||
thread_ctrl::wait_for(20, false);
|
||||
}
|
||||
|
||||
// Print all remaining results
|
||||
for (auto& [id, info] : threads)
|
||||
{
|
||||
info.print(id);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr auto thread_name = "CPU Profiler"sv;
|
||||
};
|
||||
|
||||
using cpu_profiler = named_thread<cpu_prof>;
|
||||
|
||||
thread_local cpu_thread* g_tls_current_cpu_thread = nullptr;
|
||||
|
||||
// For synchronizing suspend_all operation
|
||||
|
@ -91,6 +283,16 @@ void cpu_thread::operator()()
|
|||
thread_ctrl::set_native_priority(-1);
|
||||
}
|
||||
|
||||
if (id_type() == 1 && false)
|
||||
{
|
||||
g_fxo->get<cpu_profiler>()->registered.push(id);
|
||||
}
|
||||
|
||||
if (id_type() == 2 && g_cfg.core.spu_prof)
|
||||
{
|
||||
g_fxo->get<cpu_profiler>()->registered.push(id);
|
||||
}
|
||||
|
||||
// Register thread in g_cpu_array
|
||||
if (!g_cpu_array_sema.try_inc(sizeof(g_cpu_array_bits) * 8))
|
||||
{
|
||||
|
@ -418,5 +620,22 @@ void cpu_thread::stop_all() noexcept
|
|||
std::this_thread::sleep_for(10ms);
|
||||
}
|
||||
|
||||
// Workaround for remaining threads (TODO)
|
||||
std::this_thread::sleep_for(1300ms);
|
||||
|
||||
LOG_NOTICE(GENERAL, "All CPU threads have been stopped.");
|
||||
}
|
||||
|
||||
void cpu_thread::flush_profilers() noexcept
|
||||
{
|
||||
if (!g_fxo->get<cpu_profiler>())
|
||||
{
|
||||
LOG_FATAL(GENERAL, "cpu_thread::flush_profilers() has been called incorrectly." HERE);
|
||||
return;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_prof || false)
|
||||
{
|
||||
g_fxo->get<cpu_profiler>()->registered.push(0);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue