mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 21:41:26 +12:00
SPU: smart GETLLAR spin detection
This commit is contained in:
parent
feff2ba09e
commit
5693cc9eb3
3 changed files with 139 additions and 13 deletions
|
@ -4422,7 +4422,10 @@ bool spu_thread::process_mfc_cmd()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Check if we can reuse our existing reservation
|
// Check if we can reuse our existing reservation
|
||||||
if (rtime == vm::reservation_acquire(addr) && cmp_rdata(rdata, data))
|
auto& res = vm::reservation_acquire(addr);
|
||||||
|
const u64 this_time = res;
|
||||||
|
|
||||||
|
if (this_time % 128 == 0 && cmp_rdata(rdata, data))
|
||||||
{
|
{
|
||||||
mov_rdata(_ref<spu_rdata_t>(ch_mfc_cmd.lsa & 0x3ff80), rdata);
|
mov_rdata(_ref<spu_rdata_t>(ch_mfc_cmd.lsa & 0x3ff80), rdata);
|
||||||
ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
|
ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
|
||||||
|
@ -4430,27 +4433,105 @@ bool spu_thread::process_mfc_cmd()
|
||||||
// Need to check twice for it to be accurate, the code is before and not after this check for:
|
// Need to check twice for it to be accurate, the code is before and not after this check for:
|
||||||
// 1. Reduce time between reservation accesses so TSX panelty would be lowered
|
// 1. Reduce time between reservation accesses so TSX panelty would be lowered
|
||||||
// 2. Increase the chance of change detection: if GETLLAR has been called again new data is probably wanted
|
// 2. Increase the chance of change detection: if GETLLAR has been called again new data is probably wanted
|
||||||
if (rtime == vm::reservation_acquire(addr) && cmp_rdata(rdata, data))
|
if (this_time == res && cmp_rdata(rdata, data))
|
||||||
{
|
{
|
||||||
|
if (this_time != rtime)
|
||||||
|
{
|
||||||
|
// Reservation was lost but the data itself remains unchanged so try to ignore it
|
||||||
|
set_events(SPU_EVENT_LR);
|
||||||
|
rtime = this_time;
|
||||||
|
}
|
||||||
|
|
||||||
if ([&]() -> bool
|
if ([&]() -> bool
|
||||||
{
|
{
|
||||||
// Validation that it is indeed GETLLAR spinning (large time window is intentional)
|
// Validation that it is indeed GETLLAR spinning (large time window is intentional)
|
||||||
if (last_getllar != pc || mfc_cmd_id - 1 != last_getllar_id || perf0.get() - last_gtsc >= 10'000)
|
if (last_getllar_addr != addr || last_getllar != pc || mfc_cmd_id - 1 != last_getllar_id || perf0.get() - last_gtsc >= 15'000)
|
||||||
{
|
{
|
||||||
// Seemingly not
|
// Seemingly not
|
||||||
getllar_busy_waiting_switch = umax;
|
getllar_busy_waiting_switch = umax;
|
||||||
|
getllar_spin_count = 0;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
getllar_spin_count++;
|
getllar_spin_count = std::min<u32>(getllar_spin_count + 1, u16{umax});
|
||||||
|
|
||||||
if (getllar_busy_waiting_switch == umax)
|
static atomic_t<usz> g_ok = 0, g_fail = 0;
|
||||||
|
|
||||||
|
if (getllar_busy_waiting_switch == umax && getllar_spin_count == 4)
|
||||||
{
|
{
|
||||||
// Evalute its value (shift-right to ensure its randomness with different CPUs)
|
const u32 percent = g_cfg.core.spu_getllar_busy_waiting_percentage;
|
||||||
getllar_busy_waiting_switch = ((perf0.get() >> 8) % 100 < g_cfg.core.spu_getllar_busy_waiting_percentage) ? 1 : 0;
|
|
||||||
|
// Hidden value to force busy waiting (100 to 1 are dynamically adjusted, 0 is not)
|
||||||
|
if (percent != 101)
|
||||||
|
{
|
||||||
|
// Predict whether or not to use operating system sleep based on history
|
||||||
|
auto& stats = getllar_wait_time[pc / 32];
|
||||||
|
|
||||||
|
const auto old_stats = stats;
|
||||||
|
std::array<u8, 4> new_stats{};
|
||||||
|
|
||||||
|
// Rotate history (prepare newest entry)
|
||||||
|
new_stats[0] = 0;
|
||||||
|
new_stats[1] = old_stats[0];
|
||||||
|
new_stats[2] = old_stats[1];
|
||||||
|
new_stats[3] = old_stats[2];
|
||||||
|
|
||||||
|
stats = new_stats;
|
||||||
|
|
||||||
|
u32 total_wait = 0;
|
||||||
|
u32 zero_count = 0; // Try to ignore major inconsistencies
|
||||||
|
|
||||||
|
for (u8 val : old_stats)
|
||||||
|
{
|
||||||
|
total_wait += val;
|
||||||
|
zero_count += (val == 0 ? 1 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return !!getllar_busy_waiting_switch || getllar_spin_count < 3;
|
// Add to chance if previous wait was long enough
|
||||||
|
const u32 add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40
|
||||||
|
: zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40
|
||||||
|
: zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40
|
||||||
|
: zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
// Evalute its value (shift-right to ensure its randomness with different CPUs)
|
||||||
|
getllar_busy_waiting_switch = ((perf0.get() >> 8) % 100 + add_count < percent) ? 1 : 0;
|
||||||
|
|
||||||
|
getllar_evaluate_time = perf0.get();
|
||||||
|
|
||||||
|
if (getllar_busy_waiting_switch)
|
||||||
|
{
|
||||||
|
g_fail++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
g_ok++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((g_ok + g_fail) % 20 == 0 && !getllar_busy_waiting_switch)
|
||||||
|
spu_log.trace("SPU wait: count=%d. switch=%d, spin=%d, fail=%d, ok=%d, {%d, %d, %d, %d}", total_wait, getllar_busy_waiting_switch, getllar_spin_count, +g_fail, +g_ok, old_stats[0], old_stats[1], old_stats[2], old_stats[3] );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
getllar_busy_waiting_switch = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Don't be stubborn, force operating sleep if too much time has passed
|
||||||
|
else if (getllar_busy_waiting_switch == 1 && perf0.get() > getllar_evaluate_time && perf0.get() - getllar_evaluate_time >= 400'000)
|
||||||
|
{
|
||||||
|
const u32 percent = g_cfg.core.spu_getllar_busy_waiting_percentage;
|
||||||
|
|
||||||
|
// Hidden value to force busy waiting
|
||||||
|
if (percent != 101)
|
||||||
|
{
|
||||||
|
spu_log.trace("SPU wait for 0x%x", addr);
|
||||||
|
getllar_wait_time[pc / 32].front() = 1;
|
||||||
|
getllar_busy_waiting_switch = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Either 1 or umax
|
||||||
|
return getllar_busy_waiting_switch != 0;
|
||||||
}())
|
}())
|
||||||
{
|
{
|
||||||
if (g_cfg.core.mfc_debug)
|
if (g_cfg.core.mfc_debug)
|
||||||
|
@ -4468,6 +4549,8 @@ bool spu_thread::process_mfc_cmd()
|
||||||
|
|
||||||
if (getllar_busy_waiting_switch == 1)
|
if (getllar_busy_waiting_switch == 1)
|
||||||
{
|
{
|
||||||
|
getllar_wait_time[pc / 32].front() = 0;
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
#if defined(ARCH_X64)
|
||||||
if (utils::has_um_wait())
|
if (utils::has_um_wait())
|
||||||
{
|
{
|
||||||
|
@ -4501,13 +4584,21 @@ bool spu_thread::process_mfc_cmd()
|
||||||
|
|
||||||
// Spinning, might as well yield cpu resources
|
// Spinning, might as well yield cpu resources
|
||||||
state += cpu_flag::wait;
|
state += cpu_flag::wait;
|
||||||
vm::reservation_notifier(addr).wait(rtime, atomic_wait_timeout{50'000});
|
|
||||||
|
// Storage efficient method to distinguish different nearby addresses (which are likely)
|
||||||
|
g_reservation_waiters[std::popcount(addr)]++;
|
||||||
|
|
||||||
|
vm::reservation_notifier(addr).wait(this_time, atomic_wait_timeout{100'000});
|
||||||
|
|
||||||
|
g_reservation_waiters[std::popcount(addr)]--;
|
||||||
|
|
||||||
// Reset perf
|
// Reset perf
|
||||||
perf0.restart();
|
perf0.restart();
|
||||||
|
|
||||||
// Quick check if there were reservation changes
|
// Quick check if there were reservation changes
|
||||||
if (rtime == vm::reservation_acquire(addr) && cmp_rdata(rdata, data))
|
const u64 new_time = res;
|
||||||
|
|
||||||
|
if (new_time % 128 == 0 && cmp_rdata(rdata, data) && res == new_time && cmp_rdata(rdata, data))
|
||||||
{
|
{
|
||||||
if (g_cfg.core.mfc_debug)
|
if (g_cfg.core.mfc_debug)
|
||||||
{
|
{
|
||||||
|
@ -4518,12 +4609,40 @@ bool spu_thread::process_mfc_cmd()
|
||||||
std::memcpy(dump.data, rdata, 128);
|
std::memcpy(dump.data, rdata, 128);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Let the game recheck its state, maybe after a long period of time something else changed which satisfies its waiting condition
|
if (new_time != rtime)
|
||||||
getllar_spin_count = 1;
|
{
|
||||||
|
// Reservation was lost but the data itself remains unchanged so try to ignore it
|
||||||
|
set_events(SPU_EVENT_LR);
|
||||||
|
rtime = this_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8& val = getllar_wait_time[pc / 32].front();
|
||||||
|
val = static_cast<u8>(std::min<u32>(val + 1, u8{umax}));
|
||||||
|
|
||||||
last_getllar_id = mfc_cmd_id;
|
last_getllar_id = mfc_cmd_id;
|
||||||
last_gtsc = perf0.get();
|
last_gtsc = perf0.get();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new_time == this_time && res == this_time)
|
||||||
|
{
|
||||||
|
spu_log.trace("RTIME unchanged on address 0x%x", addr);
|
||||||
|
|
||||||
|
// Try to forcefully change in order to notify threads
|
||||||
|
if (g_reservation_waiters[std::popcount(addr)] && res.compare_and_swap_test(this_time, this_time + 128))
|
||||||
|
{
|
||||||
|
vm::reservation_notifier(addr).notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this_time == rtime)
|
||||||
|
{
|
||||||
|
// Try to forcefully change in order to notify threads
|
||||||
|
if (g_reservation_waiters[std::popcount(addr)] && res.compare_and_swap_test(this_time, this_time + 128))
|
||||||
|
{
|
||||||
|
vm::reservation_notifier(addr).notify_all();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4535,6 +4654,7 @@ bool spu_thread::process_mfc_cmd()
|
||||||
|
|
||||||
last_getllar_id = mfc_cmd_id;
|
last_getllar_id = mfc_cmd_id;
|
||||||
last_getllar = pc;
|
last_getllar = pc;
|
||||||
|
last_getllar_addr = addr;
|
||||||
last_gtsc = perf0.get();
|
last_gtsc = perf0.get();
|
||||||
getllar_spin_count = 0;
|
getllar_spin_count = 0;
|
||||||
getllar_busy_waiting_switch = umax;
|
getllar_busy_waiting_switch = umax;
|
||||||
|
@ -6855,3 +6975,4 @@ void fmt_class_string<spu_channel_4_t>::format(std::string& out, u64 arg)
|
||||||
DECLARE(spu_thread::g_raw_spu_ctr){};
|
DECLARE(spu_thread::g_raw_spu_ctr){};
|
||||||
DECLARE(spu_thread::g_raw_spu_id){};
|
DECLARE(spu_thread::g_raw_spu_id){};
|
||||||
DECLARE(spu_thread::g_spu_work_count){};
|
DECLARE(spu_thread::g_spu_work_count){};
|
||||||
|
DECLARE(spu_thread::g_reservation_waiters){};
|
||||||
|
|
|
@ -798,8 +798,10 @@ public:
|
||||||
u64 last_gtsc = 0;
|
u64 last_gtsc = 0;
|
||||||
u32 last_getllar = umax; // LS address of last GETLLAR (if matches current GETLLAR we can let the thread rest)
|
u32 last_getllar = umax; // LS address of last GETLLAR (if matches current GETLLAR we can let the thread rest)
|
||||||
u32 last_getllar_id = umax;
|
u32 last_getllar_id = umax;
|
||||||
|
u32 last_getllar_addr = umax;
|
||||||
u32 getllar_spin_count = 0;
|
u32 getllar_spin_count = 0;
|
||||||
u32 getllar_busy_waiting_switch = umax; // umax means the test needs evaluation, otherwise it's a boolean
|
u32 getllar_busy_waiting_switch = umax; // umax means the test needs evaluation, otherwise it's a boolean
|
||||||
|
u64 getllar_evaluate_time = 0;
|
||||||
|
|
||||||
std::vector<mfc_cmd_dump> mfc_history;
|
std::vector<mfc_cmd_dump> mfc_history;
|
||||||
u64 mfc_dump_idx = 0;
|
u64 mfc_dump_idx = 0;
|
||||||
|
@ -823,6 +825,8 @@ public:
|
||||||
u32 current_bp_pc = umax;
|
u32 current_bp_pc = umax;
|
||||||
bool stop_flag_removal_protection = false;
|
bool stop_flag_removal_protection = false;
|
||||||
|
|
||||||
|
std::array<std::array<u8, 4>, SPU_LS_SIZE / 32> getllar_wait_time{};
|
||||||
|
|
||||||
void push_snr(u32 number, u32 value);
|
void push_snr(u32 number, u32 value);
|
||||||
static void do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls);
|
static void do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls);
|
||||||
bool do_dma_check(const spu_mfc_cmd& args);
|
bool do_dma_check(const spu_mfc_cmd& args);
|
||||||
|
@ -892,6 +896,7 @@ public:
|
||||||
static atomic_t<u32> g_raw_spu_ctr;
|
static atomic_t<u32> g_raw_spu_ctr;
|
||||||
static atomic_t<u32> g_raw_spu_id[5];
|
static atomic_t<u32> g_raw_spu_id[5];
|
||||||
static atomic_t<u32> g_spu_work_count;
|
static atomic_t<u32> g_spu_work_count;
|
||||||
|
static atomic_t<u8> g_reservation_waiters[32];
|
||||||
|
|
||||||
static u32 find_raw_spu(u32 id)
|
static u32 find_raw_spu(u32 id)
|
||||||
{
|
{
|
||||||
|
|
|
@ -33,7 +33,7 @@ struct cfg_root : cfg::node
|
||||||
cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false };
|
cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false };
|
||||||
cfg::_enum<spu_decoder_type> spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm };
|
cfg::_enum<spu_decoder_type> spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm };
|
||||||
cfg::uint<0, 100> spu_reservation_busy_waiting_percentage{ this, "SPU Reservation Busy Waiting Percentage", 0, true };
|
cfg::uint<0, 100> spu_reservation_busy_waiting_percentage{ this, "SPU Reservation Busy Waiting Percentage", 0, true };
|
||||||
cfg::uint<0, 100> spu_getllar_busy_waiting_percentage{ this, "SPU GETLLAR Busy Waiting Percentage", 100, true };
|
cfg::uint<0, 101> spu_getllar_busy_waiting_percentage{ this, "SPU GETLLAR Busy Waiting Percentage", 100, true };
|
||||||
cfg::_bool spu_debug{ this, "SPU Debug" };
|
cfg::_bool spu_debug{ this, "SPU Debug" };
|
||||||
cfg::_bool mfc_debug{ this, "MFC Debug" };
|
cfg::_bool mfc_debug{ this, "MFC Debug" };
|
||||||
cfg::_int<0, 6> preferred_spu_threads{ this, "Preferred SPU Threads", 0, true }; // Number of hardware threads dedicated to heavy simultaneous spu tasks
|
cfg::_int<0, 6> preferred_spu_threads{ this, "Preferred SPU Threads", 0, true }; // Number of hardware threads dedicated to heavy simultaneous spu tasks
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue