rpcs3/rpcs3/Emu/CPU/CPUThread.cpp
Nekotekina ba26e16411 Rename vm::reservation_light_op -> light_op
Rename vm::reservation_peek_op -> peek_op
Also remove overkill assertion for cpu_flag::temp.
2020-10-31 15:08:49 +03:00

1037 lines
22 KiB
C++

#include "stdafx.h"
#include "CPUThread.h"
#include "Emu/System.h"
#include "Emu/system_config.h"
#include "Emu/Memory/vm_locking.h"
#include "Emu/IdManager.h"
#include "Emu/GDB.h"
#include "Emu/Cell/PPUThread.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/perf_meter.hpp"
#include <thread>
#include <unordered_map>
#include <numeric>
#include <map>
DECLARE(cpu_thread::g_threads_created){0};
DECLARE(cpu_thread::g_threads_deleted){0};
DECLARE(cpu_thread::g_suspend_counter){0};
LOG_CHANNEL(profiler);
LOG_CHANNEL(sys_log, "SYS");
static thread_local u64 s_tls_thread_slot = -1;
extern thread_local void(*g_tls_log_control)(const char* fmt, u64 progress);
template <>
void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
{
format_enum(out, arg, [](cpu_flag f)
{
switch (f)
{
case cpu_flag::stop: return "STOP";
case cpu_flag::exit: return "EXIT";
case cpu_flag::wait: return "w";
case cpu_flag::temp: return "t";
case cpu_flag::pause: return "p";
case cpu_flag::suspend: return "s";
case cpu_flag::ret: return "ret";
case cpu_flag::signal: return "sig";
case cpu_flag::memory: return "mem";
case cpu_flag::dbg_global_pause: return "G-PAUSE";
case cpu_flag::dbg_global_stop: return "G-EXIT";
case cpu_flag::dbg_pause: return "PAUSE";
case cpu_flag::dbg_step: return "STEP";
case cpu_flag::__bitset_enum_max: break;
}
return unknown;
});
}
template<>
void fmt_class_string<bs_t<cpu_flag>>::format(std::string& out, u64 arg)
{
format_bitset(out, arg, "[", "|", "]", &fmt_class_string<cpu_flag>::format);
}
// CPU profiler thread
struct cpu_prof
{
// PPU/SPU id enqueued for registration
lf_queue<u32> registered;
struct sample_info
{
// Weak pointer to the thread
std::weak_ptr<cpu_thread> wptr;
// Block occurences: name -> sample_count
std::unordered_map<u64, u64, value_hash<u64>> freq;
// Total number of samples
u64 samples = 0, idle = 0;
sample_info(const std::shared_ptr<cpu_thread>& ptr)
: wptr(ptr)
{
}
void reset()
{
freq.clear();
samples = 0;
idle = 0;
}
// Print info
void print(u32 id) const
{
// Make reversed map: sample_count -> name
std::multimap<u64, u64, std::greater<u64>> chart;
for (auto& [name, count] : freq)
{
chart.emplace(count, name);
}
// Print results
std::string results;
results.reserve(5100);
// Fraction of non-idle samples
const f64 busy = 1. * (samples - idle) / samples;
for (auto& [count, name] : chart)
{
const f64 _frac = count / busy / samples;
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
results.resize(results.size() - 4);
// Print chunk address from lowest 16 bits
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
if (results.size() >= 5000)
{
// Stop printing after reaching some arbitrary limit in characters
break;
}
}
profiler.notice("Thread [0x%08x]: %u samples (%.4f%% idle):%s", id, samples, 100. * idle / samples, results);
}
};
void operator()()
{
std::unordered_map<u32, sample_info, value_hash<u64>> threads;
while (thread_ctrl::state() != thread_state::aborting)
{
bool flush = false;
// Handle registration channel
for (u32 id : registered.pop_all())
{
if (id == 0)
{
// Handle id zero as a command to flush results
flush = true;
continue;
}
std::shared_ptr<cpu_thread> ptr;
if (id >> 24 == 1)
{
ptr = idm::get<named_thread<ppu_thread>>(id);
}
else if (id >> 24 == 2)
{
ptr = idm::get<named_thread<spu_thread>>(id);
}
else
{
profiler.error("Invalid Thread ID: 0x%08x", id);
continue;
}
if (ptr)
{
auto [found, add] = threads.try_emplace(id, ptr);
if (!add)
{
// Overwritten: print previous data
found->second.print(id);
found->second.reset();
found->second.wptr = ptr;
}
}
}
if (threads.empty())
{
// Wait for messages if no work (don't waste CPU)
registered.wait();
continue;
}
// Sample active threads
for (auto& [id, info] : threads)
{
if (auto ptr = info.wptr.lock())
{
// Get short function hash
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
// Append occurrence
info.samples++;
if (!(ptr->state.load() & (cpu_flag::wait + cpu_flag::stop + cpu_flag::dbg_global_pause)))
{
info.freq[name]++;
// Append verification time to fixed common name 0000000...chunk-0x3fffc
if ((name & 0xffff) == 0)
info.freq[0xffff]++;
}
else
{
info.idle++;
}
}
}
// Cleanup and print results for deleted threads
for (auto it = threads.begin(), end = threads.end(); it != end;)
{
if (it->second.wptr.expired())
it->second.print(it->first), it = threads.erase(it);
else
it++;
}
if (flush)
{
profiler.success("Flushing profiling results...");
// Print all results and cleanup
for (auto& [id, info] : threads)
{
info.print(id);
info.reset();
}
}
// Wait, roughly for 20µs
thread_ctrl::wait_for(20, false);
}
// Print all remaining results
for (auto& [id, info] : threads)
{
info.print(id);
}
}
static constexpr auto thread_name = "CPU Profiler"sv;
};
using cpu_profiler = named_thread<cpu_prof>;
thread_local cpu_thread* g_tls_current_cpu_thread = nullptr;
struct cpu_counter
{
// For synchronizing suspend_all operation
alignas(64) shared_mutex cpu_suspend_lock;
// Workload linked list
alignas(64) atomic_t<cpu_thread::suspend_work*> cpu_suspend_work{};
// Semaphore for global thread array (global counter)
alignas(64) atomic_t<u32> cpu_array_sema{0};
// Semaphore subdivision for each array slot (64 x N in total)
alignas(64) atomic_t<u64> cpu_array_bits[3]{};
// Copy of array bits for internal use
alignas(64) u64 cpu_copy_bits[3]{};
// All registered threads
atomic_t<cpu_thread*> cpu_array[sizeof(cpu_array_bits) * 8]{};
u64 add(cpu_thread* _this, bool restore = false) noexcept
{
u64 array_slot = -1;
if (!restore && !cpu_array_sema.try_inc(sizeof(cpu_counter::cpu_array_bits) * 8))
{
sys_log.fatal("Too many threads.");
return array_slot;
}
for (u32 i = 0;; i = (i + 1) % ::size32(cpu_array_bits))
{
const auto [bits, ok] = cpu_array_bits[i].fetch_op([](u64& bits) -> u64
{
if (~bits) [[likely]]
{
// Set lowest clear bit
bits |= bits + 1;
return true;
}
return false;
});
if (ok) [[likely]]
{
// Get actual slot number
array_slot = i * 64 + std::countr_one(bits);
// Register thread
if (cpu_array[array_slot].compare_and_swap_test(nullptr, _this)) [[likely]]
{
break;
}
sys_log.fatal("Unexpected slot registration failure (%u).", array_slot);
cpu_array_bits[array_slot / 64] &= ~(1ull << (array_slot % 64));
continue;
}
}
if (!restore)
{
// First time (thread created)
_this->state += cpu_flag::wait;
cpu_suspend_lock.lock_unlock();
}
return array_slot;
}
void remove(cpu_thread* _this, u64 slot) noexcept
{
// Unregister and wait if necessary
_this->state += cpu_flag::wait;
std::lock_guard lock(cpu_suspend_lock);
if (!cpu_array[slot].compare_and_swap_test(_this, nullptr))
{
sys_log.fatal("Inconsistency for array slot %u", slot);
return;
}
cpu_array_bits[slot / 64] &= ~(1ull << (slot % 64));
cpu_array_sema--;
}
// Remove temporarily
void remove(cpu_thread* _this) noexcept
{
// Unregister temporarily (called from check_state)
const u64 index = s_tls_thread_slot;
if (index >= std::size(cpu_array))
{
sys_log.fatal("Index out of bounds (%u).", index);
return;
}
if (cpu_array[index].load() == _this && cpu_array[index].compare_and_swap_test(_this, nullptr))
{
cpu_array_bits[index / 64] &= ~(1ull << (index % 64));
return;
}
sys_log.fatal("Thread not found in cpu_array (%s).", _this->get_name());
}
};
template <bool UseCopy = false, typename F>
void for_all_cpu(F func) noexcept
{
const auto ctr = g_fxo->get<cpu_counter>();
for (u32 i = 0; i < ::size32(ctr->cpu_array_bits); i++)
{
for (u64 bits = (UseCopy ? ctr->cpu_copy_bits[i] : ctr->cpu_array_bits[i].load()); bits; bits &= bits - 1)
{
const u64 index = i * 64 + std::countr_zero(bits);
if (cpu_thread* cpu = ctr->cpu_array[index].load())
{
if constexpr (std::is_invocable_v<F, cpu_thread*, u64>)
{
func(cpu, index);
continue;
}
if constexpr (std::is_invocable_v<F, cpu_thread*>)
{
func(cpu);
continue;
}
}
}
}
}
void cpu_thread::operator()()
{
g_tls_current_cpu_thread = this;
if (g_cfg.core.thread_scheduler_enabled)
{
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(id_type() == 1 ? thread_class::ppu : thread_class::spu));
}
if (id_type() == 2)
{
if (g_cfg.core.lower_spu_priority)
{
thread_ctrl::set_native_priority(-1);
}
// force input/output denormals to zero for SPU threads (FTZ/DAZ)
_mm_setcsr( _mm_getcsr() | 0x8040 );
const volatile int a = 0x1fc00000;
__m128 b = _mm_castsi128_ps(_mm_set1_epi32(a));
int c = _mm_cvtsi128_si32(_mm_castps_si128(_mm_mul_ps(b,b)));
if (c != 0)
{
sys_log.fatal("Could not disable denormals.");
}
}
while (!g_fxo->get<cpu_counter>() && !g_fxo->get<cpu_profiler>())
{
// Can we have a little race, right? First thread is started concurrently with g_fxo->init()
std::this_thread::sleep_for(1ms);
}
switch (id_type())
{
case 1:
{
//g_fxo->get<cpu_profiler>()->registered.push(id);
break;
}
case 2:
{
if (g_cfg.core.spu_prof)
{
g_fxo->get<cpu_profiler>()->registered.push(id);
}
break;
}
default: break;
}
// Register thread in g_cpu_array
s_tls_thread_slot = g_fxo->get<cpu_counter>()->add(this);
if (s_tls_thread_slot == umax)
{
return;
}
atomic_storage_futex::set_notify_callback([](const void*, u64 progress)
{
static thread_local bool wait_set = false;
cpu_thread* _cpu = get_current_cpu_thread();
// Wait flag isn't set asynchronously so this should be thread-safe
if (progress == 0 && !(_cpu->state & cpu_flag::wait))
{
// Operation just started and syscall is imminent
_cpu->state += cpu_flag::wait + cpu_flag::temp;
wait_set = true;
return;
}
if (progress == umax && std::exchange(wait_set, false))
{
// Operation finished: need to clean wait flag
verify(HERE), !_cpu->check_state();
return;
}
});
g_tls_log_control = [](const char* fmt, u64 progress)
{
static thread_local bool wait_set = false;
cpu_thread* _cpu = get_current_cpu_thread();
if (progress == 0 && !(_cpu->state & cpu_flag::wait))
{
_cpu->state += cpu_flag::wait + cpu_flag::temp;
wait_set = true;
return;
}
if (progress == umax && std::exchange(wait_set, false))
{
verify(HERE), !_cpu->check_state();
return;
}
};
static thread_local struct thread_cleanup_t
{
cpu_thread* _this;
std::string name;
thread_cleanup_t(cpu_thread* _this)
: _this(_this)
, name(thread_ctrl::get_name())
{
}
void cleanup()
{
if (_this == nullptr)
{
return;
}
if (auto ptr = vm::g_tls_locked)
{
ptr->compare_and_swap(_this, nullptr);
}
atomic_storage_futex::set_notify_callback(nullptr);
g_tls_log_control = [](const char*, u64){};
g_fxo->get<cpu_counter>()->remove(_this, s_tls_thread_slot);
_this = nullptr;
}
~thread_cleanup_t()
{
if (_this)
{
sys_log.warning("CPU Thread '%s' terminated abnormally:\n%s", name, _this->dump_all());
cleanup();
}
}
} cleanup{this};
// Check thread status
while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop)) && thread_ctrl::state() != thread_state::aborting)
{
// Check stop status
if (!(state & cpu_flag::stop))
{
cpu_task();
if (state & cpu_flag::ret && state.test_and_reset(cpu_flag::ret))
{
cpu_return();
}
continue;
}
thread_ctrl::wait();
if (state & cpu_flag::ret && state.test_and_reset(cpu_flag::ret))
{
cpu_return();
}
}
// Complete cleanup gracefully
cleanup.cleanup();
}
cpu_thread::~cpu_thread()
{
vm::cleanup_unlock(*this);
g_threads_deleted++;
}
cpu_thread::cpu_thread(u32 id)
: id(id)
{
g_threads_created++;
}
bool cpu_thread::check_state() noexcept
{
if (state & cpu_flag::dbg_pause)
{
g_fxo->get<gdb_server>()->pause_from(this);
}
bool cpu_sleep_called = false;
bool cpu_can_stop = true;
bool escape, retval;
u64 susp_ctr = -1;
while (true)
{
// Process all flags in a single atomic op
const auto state0 = state.fetch_op([&](bs_t<cpu_flag>& flags)
{
bool store = false;
if (flags & cpu_flag::pause && !(flags & cpu_flag::wait))
{
// Save value before state is saved and cpu_flag::wait is observed
susp_ctr = g_suspend_counter;
}
if (flags & cpu_flag::temp) [[unlikely]]
{
// Sticky flag, indicates check_state() is not allowed to return true
flags -= cpu_flag::temp;
flags -= cpu_flag::wait;
cpu_can_stop = false;
store = true;
}
if (flags & cpu_flag::signal)
{
flags -= cpu_flag::signal;
cpu_sleep_called = false;
store = true;
}
// Atomically clean wait flag and escape
if (!(flags & (cpu_flag::exit + cpu_flag::dbg_global_stop + cpu_flag::ret + cpu_flag::stop)))
{
// Check pause flags which hold thread inside check_state
if (flags & (cpu_flag::pause + cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause + cpu_flag::memory))
{
if (!(flags & cpu_flag::wait))
{
flags += cpu_flag::wait;
store = true;
}
escape = false;
return store;
}
if (flags & cpu_flag::wait)
{
flags -= cpu_flag::wait;
store = true;
}
retval = false;
}
else
{
if (cpu_can_stop && !(flags & cpu_flag::wait))
{
flags += cpu_flag::wait;
store = true;
}
retval = cpu_can_stop;
}
if (cpu_can_stop && flags & cpu_flag::dbg_step)
{
// Can't process dbg_step if we only paused temporarily
flags += cpu_flag::dbg_pause;
flags -= cpu_flag::dbg_step;
store = true;
}
escape = true;
return store;
}).first;
if (escape)
{
if (s_tls_thread_slot == umax)
{
// Restore thread in the suspend list
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
s_tls_thread_slot = g_fxo->get<cpu_counter>()->add(this, true);
}
verify(HERE), cpu_can_stop || !retval;
return retval;
}
if (!cpu_sleep_called && state0 & cpu_flag::suspend)
{
cpu_sleep();
cpu_sleep_called = true;
if (cpu_can_stop && s_tls_thread_slot != umax)
{
// Exclude inactive threads from the suspend list (optimization)
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
g_fxo->get<cpu_counter>()->remove(this);
s_tls_thread_slot = -1;
}
continue;
}
if (state0 & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause))
{
thread_ctrl::wait();
}
else
{
if (state0 & cpu_flag::memory)
{
vm::passive_lock(*this);
continue;
}
// If only cpu_flag::pause was set, wait on suspend counter instead
if (state0 & cpu_flag::pause)
{
if (state0 & cpu_flag::wait)
{
// Otherwise, value must be reliable because cpu_flag::wait hasn't been observed yet
susp_ctr = -1;
}
// Hard way
if (susp_ctr == umax) [[unlikely]]
{
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
continue;
}
// Wait for current suspend_all operation
for (u64 i = 0;; i++)
{
if (i < 20)
{
busy_wait(300);
}
else
{
g_suspend_counter.wait(susp_ctr);
}
if (!(state & cpu_flag::pause))
{
break;
}
}
susp_ctr = -1;
}
}
}
}
void cpu_thread::notify()
{
// Downcast to correct type
if (id_type() == 1)
{
thread_ctrl::notify(*static_cast<named_thread<ppu_thread>*>(this));
}
else if (id_type() == 2)
{
thread_ctrl::notify(*static_cast<named_thread<spu_thread>*>(this));
}
else
{
fmt::throw_exception("Invalid cpu_thread type" HERE);
}
}
void cpu_thread::abort()
{
// Downcast to correct type
if (id_type() == 1)
{
*static_cast<named_thread<ppu_thread>*>(this) = thread_state::aborting;
}
else if (id_type() == 2)
{
*static_cast<named_thread<spu_thread>*>(this) = thread_state::aborting;
}
else
{
fmt::throw_exception("Invalid cpu_thread type" HERE);
}
}
std::string cpu_thread::get_name() const
{
// Downcast to correct type
if (id_type() == 1)
{
return thread_ctrl::get_name(*static_cast<const named_thread<ppu_thread>*>(this));
}
else if (id_type() == 2)
{
return thread_ctrl::get_name(*static_cast<const named_thread<spu_thread>*>(this));
}
else
{
fmt::throw_exception("Invalid cpu_thread type" HERE);
}
}
std::string cpu_thread::dump_all() const
{
return {};
}
std::string cpu_thread::dump_regs() const
{
return {};
}
std::string cpu_thread::dump_callstack() const
{
return {};
}
std::vector<std::pair<u32, u32>> cpu_thread::dump_callstack_list() const
{
return {};
}
std::string cpu_thread::dump_misc() const
{
return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load());
}
bool cpu_thread::suspend_work::push(cpu_thread* _this, bool cancel_if_not_suspended) noexcept
{
// Can't allow pre-set wait bit (it'd be a problem)
verify(HERE), !_this || !(_this->state & cpu_flag::wait);
// cpu_counter object
const auto ctr = g_fxo->get<cpu_counter>();
// Try to push workload
auto& queue = ctr->cpu_suspend_work;
do
{
// Load current head
next = queue.load();
if (!next && cancel_if_not_suspended) [[unlikely]]
{
// Give up if not suspended
return false;
}
if (!_this && next)
{
// If _this == nullptr, it only works if this is the first workload pushed
ctr->cpu_suspend_lock.lock_unlock();
continue;
}
}
while (!queue.compare_and_swap_test(next, this));
if (!next)
{
// Monitor the performance only of the actual suspend processing owner
perf_meter<"SUSPEND"_u64> perf0;
// First thread to push the work to the workload list pauses all threads and processes it
std::lock_guard lock(ctr->cpu_suspend_lock);
// Try to prefetch cpu->state earlier
for_all_cpu([&](cpu_thread* cpu)
{
if (cpu != _this)
{
_m_prefetchw(&cpu->state);
}
});
// Copy of thread bits
decltype(ctr->cpu_copy_bits) copy2{};
for (u32 i = 0; i < ::size32(ctr->cpu_copy_bits); i++)
{
copy2[i] = ctr->cpu_copy_bits[i] = ctr->cpu_array_bits[i].load();
}
for_all_cpu([&](cpu_thread* cpu, u64 index)
{
if (cpu == _this || cpu->state.fetch_add(cpu_flag::pause) & cpu_flag::wait)
{
// Clear bits as long as wait flag is set
ctr->cpu_copy_bits[index / 64] &= ~(1ull << (index % 64));
}
if (cpu == _this)
{
copy2[index / 64] &= ~(1ull << (index % 64));
}
});
while (true)
{
// Check only CPUs which haven't acknowledged their waiting state yet
for_all_cpu<true>([&](cpu_thread* cpu, u64 index)
{
if (cpu->state & cpu_flag::wait)
{
ctr->cpu_copy_bits[index / 64] &= ~(1ull << (index % 64));
}
});
if (!std::accumulate(std::begin(ctr->cpu_copy_bits), std::end(ctr->cpu_copy_bits), u64{0}, std::bit_or()))
{
break;
}
_mm_pause();
}
// Extract queue and reverse element order (FILO to FIFO) (TODO: maybe leave order as is?)
auto* head = queue.exchange(nullptr);
s8 min_prio = head->prio;
s8 max_prio = head->prio;
if (auto* prev = head->next)
{
head->next = nullptr;
do
{
auto* pre2 = prev->next;
prev->next = head;
head = std::exchange(prev, pre2);
// Fill priority range
min_prio = std::min<s8>(min_prio, head->prio);
max_prio = std::max<s8>(max_prio, head->prio);
}
while (prev);
}
// Execute prefetch hint(s)
for (auto work = head; work; work = work->next)
{
for (u32 i = 0; i < work->prf_size; i++)
{
_m_prefetchw(work->prf_list[0]);
}
}
for_all_cpu<true>([&](cpu_thread* cpu)
{
_m_prefetchw(&cpu->state);
});
// Execute all stored workload
for (s32 prio = max_prio; prio >= min_prio; prio--)
{
// ... according to priorities
for (auto work = head; work; work = work->next)
{
// Properly sorting single-linked list may require to optimize the loop
if (work->prio == prio)
{
work->exec(work->func_ptr, work->res_buf);
}
}
}
// Not sure if needed, may be overkill. Some workloads may execute instructions with non-temporal hint.
_mm_sfence();
// Finalization
g_suspend_counter++;
// Exact bitset for flag pause removal
std::memcpy(ctr->cpu_copy_bits, copy2, sizeof(copy2));
for_all_cpu<true>([&](cpu_thread* cpu)
{
cpu->state -= cpu_flag::pause;
});
}
else
{
// Seems safe to set pause on self because wait flag hasn't been observed yet
_this->state += cpu_flag::pause + cpu_flag::temp;
_this->check_state();
return true;
}
g_suspend_counter.notify_all();
return true;
}
void cpu_thread::stop_all() noexcept
{
if (g_tls_current_cpu_thread)
{
// Report unsupported but unnecessary case
sys_log.fatal("cpu_thread::stop_all() has been called from a CPU thread.");
return;
}
else
{
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
auto on_stop = [](u32, cpu_thread& cpu)
{
cpu.state += cpu_flag::dbg_global_stop;
cpu.abort();
};
idm::select<named_thread<ppu_thread>>(on_stop);
idm::select<named_thread<spu_thread>>(on_stop);
}
sys_log.notice("All CPU threads have been signaled.");
while (g_fxo->get<cpu_counter>()->cpu_array_sema)
{
std::this_thread::sleep_for(10ms);
}
sys_log.notice("All CPU threads have been stopped. [+: %u]", +g_threads_created);
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
g_threads_deleted -= g_threads_created.load();
g_threads_created = 0;
}
void cpu_thread::flush_profilers() noexcept
{
if (!g_fxo->get<cpu_profiler>())
{
profiler.fatal("cpu_thread::flush_profilers() has been called incorrectly." HERE);
return;
}
if (g_cfg.core.spu_prof || false)
{
g_fxo->get<cpu_profiler>()->registered.push(0);
}
}