mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 05:51:27 +12:00
CPU: use u128 in bit counter
Apparently, u64 was not enough.
This commit is contained in:
parent
d4d5dc99f3
commit
292af1e4cd
1 changed files with 23 additions and 14 deletions
|
@ -10,6 +10,7 @@
|
||||||
#include "Emu/Cell/SPUThread.h"
|
#include "Emu/Cell/SPUThread.h"
|
||||||
#include "Emu/perf_meter.hpp"
|
#include "Emu/perf_meter.hpp"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -254,7 +255,7 @@ thread_local cpu_thread* g_tls_current_cpu_thread = nullptr;
|
||||||
static atomic_t<u64, 64> s_cpu_counter{0};
|
static atomic_t<u64, 64> s_cpu_counter{0};
|
||||||
|
|
||||||
// List of posted tasks for suspend_all
|
// List of posted tasks for suspend_all
|
||||||
static atomic_t<cpu_thread::suspend_work*> s_cpu_work[64]{};
|
static atomic_t<cpu_thread::suspend_work*> s_cpu_work[128]{};
|
||||||
|
|
||||||
// Linked list of pushed tasks for suspend_all
|
// Linked list of pushed tasks for suspend_all
|
||||||
static atomic_t<cpu_thread::suspend_work*> s_pushed{};
|
static atomic_t<cpu_thread::suspend_work*> s_pushed{};
|
||||||
|
@ -263,10 +264,10 @@ static atomic_t<cpu_thread::suspend_work*> s_pushed{};
|
||||||
static shared_mutex s_cpu_lock;
|
static shared_mutex s_cpu_lock;
|
||||||
|
|
||||||
// Bit allocator for threads which need to be suspended
|
// Bit allocator for threads which need to be suspended
|
||||||
static atomic_t<u64> s_cpu_bits{};
|
static atomic_t<u128> s_cpu_bits{};
|
||||||
|
|
||||||
// List of active threads which need to be suspended
|
// List of active threads which need to be suspended
|
||||||
static atomic_t<cpu_thread*> s_cpu_list[64]{};
|
static atomic_t<cpu_thread*> s_cpu_list[128]{};
|
||||||
|
|
||||||
namespace cpu_counter
|
namespace cpu_counter
|
||||||
{
|
{
|
||||||
|
@ -278,7 +279,7 @@ namespace cpu_counter
|
||||||
|
|
||||||
for (u64 i = 0;; i++)
|
for (u64 i = 0;; i++)
|
||||||
{
|
{
|
||||||
const auto [bits, ok] = s_cpu_bits.fetch_op([](u64& bits) -> u64
|
const auto [bits, ok] = s_cpu_bits.fetch_op([](u128& bits)
|
||||||
{
|
{
|
||||||
if (~bits) [[likely]]
|
if (~bits) [[likely]]
|
||||||
{
|
{
|
||||||
|
@ -293,7 +294,7 @@ namespace cpu_counter
|
||||||
if (ok) [[likely]]
|
if (ok) [[likely]]
|
||||||
{
|
{
|
||||||
// Get actual slot number
|
// Get actual slot number
|
||||||
id = std::countr_one(bits);
|
id = utils::ctz128(~bits);
|
||||||
|
|
||||||
// Register thread
|
// Register thread
|
||||||
if (s_cpu_list[id].compare_and_swap_test(nullptr, _this)) [[likely]]
|
if (s_cpu_list[id].compare_and_swap_test(nullptr, _this)) [[likely]]
|
||||||
|
@ -318,6 +319,14 @@ namespace cpu_counter
|
||||||
s_tls_thread_slot = id;
|
s_tls_thread_slot = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void remove_cpu_bit(u32 bit)
|
||||||
|
{
|
||||||
|
s_cpu_bits.atomic_op([=](u128& val)
|
||||||
|
{
|
||||||
|
val &= ~(u128{1} << (bit % 128));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void remove(cpu_thread* _this) noexcept
|
void remove(cpu_thread* _this) noexcept
|
||||||
{
|
{
|
||||||
// Unregister and wait if necessary
|
// Unregister and wait if necessary
|
||||||
|
@ -339,31 +348,31 @@ namespace cpu_counter
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
s_cpu_bits &= ~(1ull << (slot % 64));
|
remove_cpu_bit(slot);
|
||||||
|
|
||||||
s_tls_thread_slot = -1;
|
s_tls_thread_slot = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
u64 for_all_cpu(/*mutable*/ u64 copy, F func) noexcept
|
u128 for_all_cpu(/*mutable*/ u128 copy, F func) noexcept
|
||||||
{
|
{
|
||||||
for (u64 bits = copy; bits; bits &= bits - 1)
|
for (u128 bits = copy; bits; bits &= bits - 1)
|
||||||
{
|
{
|
||||||
const u32 index = std::countr_zero(bits);
|
const u32 index = utils::ctz128(bits);
|
||||||
|
|
||||||
if (cpu_thread* cpu = s_cpu_list[index].load())
|
if (cpu_thread* cpu = s_cpu_list[index].load())
|
||||||
{
|
{
|
||||||
if constexpr (std::is_invocable_v<F, cpu_thread*, u32>)
|
if constexpr (std::is_invocable_v<F, cpu_thread*, u32>)
|
||||||
{
|
{
|
||||||
if (!func(cpu, index))
|
if (!func(cpu, index))
|
||||||
copy &= ~(1ull << index);
|
copy &= ~(u128{1} << index);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (std::is_invocable_v<F, cpu_thread*>)
|
if constexpr (std::is_invocable_v<F, cpu_thread*>)
|
||||||
{
|
{
|
||||||
if (!func(cpu))
|
if (!func(cpu))
|
||||||
copy &= ~(1ull << index);
|
copy &= ~(u128{1} << index);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,7 +380,7 @@ namespace cpu_counter
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
copy &= ~(1ull << index);
|
copy &= ~(u128{1} << index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -847,7 +856,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
|
||||||
// First thread to push the work to the workload list pauses all threads and processes it
|
// First thread to push the work to the workload list pauses all threads and processes it
|
||||||
std::lock_guard lock(s_cpu_lock);
|
std::lock_guard lock(s_cpu_lock);
|
||||||
|
|
||||||
u64 copy = s_cpu_bits.load();
|
u128 copy = s_cpu_bits.load();
|
||||||
|
|
||||||
// Try to prefetch cpu->state earlier
|
// Try to prefetch cpu->state earlier
|
||||||
copy = cpu_counter::for_all_cpu(copy, [&](cpu_thread* cpu)
|
copy = cpu_counter::for_all_cpu(copy, [&](cpu_thread* cpu)
|
||||||
|
@ -865,7 +874,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
|
||||||
g_suspend_counter += 2;
|
g_suspend_counter += 2;
|
||||||
|
|
||||||
// Copy snapshot for finalization
|
// Copy snapshot for finalization
|
||||||
u64 copy2 = copy;
|
u128 copy2 = copy;
|
||||||
|
|
||||||
copy = cpu_counter::for_all_cpu(copy, [&](cpu_thread* cpu, u32 index)
|
copy = cpu_counter::for_all_cpu(copy, [&](cpu_thread* cpu, u32 index)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue