From 12f4e4fd8c27f61015769a16e369c1ba3df793ef Mon Sep 17 00:00:00 2001 From: elad335 <18193363+elad335@users.noreply.github.com> Date: Tue, 6 May 2025 18:18:45 +0300 Subject: [PATCH 1/2] SPU: Evaluated Thread-blocked LR messaging --- rpcs3/Emu/Cell/SPUThread.cpp | 273 ++++++++++++++++++++++++------ rpcs3/Emu/Cell/SPUThread.h | 9 + rpcs3/Emu/system_config.h | 3 +- rpcs3/rpcs3qt/emu_settings_type.h | 2 + rpcs3/rpcs3qt/settings_dialog.cpp | 3 + rpcs3/rpcs3qt/settings_dialog.ui | 7 + rpcs3/rpcs3qt/tooltips.h | 1 + 7 files changed, 247 insertions(+), 51 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 0396068018..8f812b5a4c 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -4549,6 +4549,126 @@ u32 spu_thread::get_mfc_completed() const return ch_tag_mask & ~mfc_fence; } +u32 evaluate_spin_optimization(std::span stats, u64 evaluate_time, const cfg::uint<0, 100>& wait_percent, bool inclined_for_responsiveness = false) +{ + ensure(stats.size() >= 2 && stats.size() <= 16); + + const u32 percent = wait_percent; + + // Predict whether or not to use operating system sleep based on history + + std::array old_stats{}; + std::copy_n(stats.data(), stats.size(), old_stats.data()); + + // Rotate history (prepare newest entry) + stats[0] = 0; + std::copy_n(old_stats.data(), stats.size() - 1, stats.data() + 1); + + u32 total_wait = 0; + u32 zero_count = 0; // Try to ignore major inconsistencies + u32 consecutive_zero = 0; + u32 consecutive_zero_or_one = 0; + u32 consecutive_zero_or_one_tally = 0; + + usz index = umax; + + for (u8 val : old_stats) + { + index++; + + if (index == stats.size()) + { + break; + } + + total_wait += val; + + if (val == 0) + { + if (consecutive_zero == index) + { + consecutive_zero++; + consecutive_zero_or_one++; + //consecutive_zero_or_one_tally += 0; + } + + ++zero_count; + } + + if (val == 1) + { + if (consecutive_zero_or_one == index) + { + consecutive_zero_or_one++; + consecutive_zero_or_one_tally++; + } + } + } + + if (inclined_for_responsiveness) + { + total_wait /= 2; + } + + // Add to chance if previous wait was long enough + u32 add_count = 0; + + if (stats.size() == 4) + { + add_count = zero_count == 3 && total_wait >= 9 ? (total_wait - 8) * 40 + : zero_count == 2 && total_wait >= 8 ? (total_wait - 7) * 40 + : zero_count == 1 && total_wait >= 7 ? (total_wait - 6) * 40 + : zero_count == 0 && total_wait >= 4 ? (total_wait - 3) * 40 + : 0; + } + else + { + add_count = zero_count >= 12 && total_wait >= 80 ? (total_wait - 80) * 30 + : zero_count >= 7 && total_wait >= 30 ? (total_wait - 30) * 10 + : zero_count >= 4 && total_wait >= 20 ? (total_wait - 20) * 10 + : zero_count >= 0 && total_wait >= 10 ? (total_wait - 10) * 10 + : 0; + } + + if (stats.size() == 16 && (consecutive_zero >= 2 || (consecutive_zero_or_one >= 3 && consecutive_zero_or_one_tally < consecutive_zero_or_one * 2 / 3))) + { + // Thread is back to action after some sleep + add_count = 0; + } + + if (inclined_for_responsiveness && std::count(old_stats.data(), old_stats.data() + 3, 0) >= 2) + { + add_count = 0; + } + + // Evalute its value (shift-right to ensure its randomness with different CPUs) + const u32 busy_waiting_switch = ((evaluate_time >> 8) % 100 + add_count < percent) ? 1 : 0; + + thread_local usz g_system_wait = 0, g_busy_wait = 0; + + if (busy_waiting_switch) + { + g_busy_wait++; + } + else + { + g_system_wait++; + } + + if ((g_system_wait + g_busy_wait) && (g_system_wait + g_busy_wait) % 200 == 0) + { + spu_log.trace("SPU wait: count=%d. switch=%d, spin=%d, busy=%d, system=%d, {%d, %d, %d, %d}", total_wait, busy_waiting_switch, !"TODO: Spin", +g_busy_wait, +g_system_wait, old_stats[0], old_stats[1], old_stats[2], old_stats[3]); + } + + if ((g_system_wait + g_busy_wait) % 5000 == 0) + { + g_system_wait = 0; + g_busy_wait = 0; + } + + return busy_waiting_switch; +} + bool spu_thread::process_mfc_cmd() { // Stall infinitely if MFC queue is full @@ -4663,61 +4783,16 @@ bool spu_thread::process_mfc_cmd() getllar_spin_count = std::min(getllar_spin_count + 1, u16{umax}); - static atomic_t g_ok = 0, g_fail = 0; - if (getllar_busy_waiting_switch == umax && getllar_spin_count == 4) { // Hidden value to force busy waiting (100 to 1 are dynamically adjusted, 0 is not) if (!g_cfg.core.spu_getllar_spin_optimization_disabled) { - const u32 percent = g_cfg.core.spu_getllar_busy_waiting_percentage; - - // Predict whether or not to use operating system sleep based on history - auto& stats = getllar_wait_time[(addr % SPU_LS_SIZE) / 128]; - - const std::array old_stats = stats; - std::array new_stats{}; - - // Rotate history (prepare newest entry) - new_stats[0] = 0; - new_stats[1] = old_stats[0]; - new_stats[2] = old_stats[1]; - new_stats[3] = old_stats[2]; - - stats = new_stats; - - u32 total_wait = 0; - u32 zero_count = 0; // Try to ignore major inconsistencies - - for (u8 val : old_stats) - { - total_wait += val; - if (val == 0) ++zero_count; - } - - // Add to chance if previous wait was long enough - const u32 add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40 - : zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40 - : zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40 - : zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40 - : 0; - - // Evalute its value (shift-right to ensure its randomness with different CPUs) - getllar_busy_waiting_switch = ((perf0.get() >> 8) % 100 + add_count < percent) ? 1 : 0; - getllar_evaluate_time = perf0.get(); + auto& history = getllar_wait_time[(addr % SPU_LS_SIZE) / 128]; - if (getllar_busy_waiting_switch) - { - g_fail++; - } - else - { - g_ok++; - } - - if ((g_ok + g_fail) % 200 == 0 && !getllar_busy_waiting_switch) - spu_log.trace("SPU wait: count=%d. switch=%d, spin=%d, fail=%d, ok=%d, {%d, %d, %d, %d}", total_wait, getllar_busy_waiting_switch, getllar_spin_count, +g_fail, +g_ok, old_stats[0], old_stats[1], old_stats[2], old_stats[3] ); + getllar_busy_waiting_switch = + evaluate_spin_optimization({ history.data(), history.size() }, getllar_evaluate_time, g_cfg.core.spu_getllar_busy_waiting_percentage); } else { @@ -5968,7 +6043,52 @@ s64 spu_thread::get_ch_value(u32 ch) return true; }; - for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true)) + const bool is_LR_wait = raddr && mask1 & SPU_EVENT_LR; + + auto& history = eventstat_wait_time[(raddr % SPU_LS_SIZE) / 128]; + + if (is_LR_wait) + { + const u32 spu_group_restart = group ? +group->stop_count : 0; + + // Check if waiting session changed + if (eventstat_raddr != raddr || eventstat_block_counter != block_counter || last_getllar != eventstat_getllar || eventstat_spu_group_restart != spu_group_restart) + { + eventstat_raddr = raddr; + eventstat_block_counter = block_counter; + eventstat_getllar = last_getllar; + eventstat_spu_group_restart = spu_group_restart; + eventstat_spin_count = 0; + eventstat_evaluate_time = get_system_time(); + eventstat_busy_waiting_switch = umax; + } + else + { + u8& val = history.front(); + val = static_cast(std::min(val + 1, u8{umax})); + } + } + else + { + eventstat_busy_waiting_switch = 0; + eventstat_raddr = 0; + eventstat_block_counter = 0; + } + + if (eventstat_busy_waiting_switch == umax) + { + bool value = false; + + if (is_LR_wait && g_cfg.core.spu_reservation_busy_waiting_enabled) + { + // Make single-threaded groups inclined for busy-waiting + value = evaluate_spin_optimization({ history.data(), history.size() }, eventstat_evaluate_time, g_cfg.core.spu_reservation_busy_waiting_percentage, group && group->max_num == 1) != 0; + } + + eventstat_busy_waiting_switch = value ? 1 : 0; + } + + for (bool is_first = true; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true), is_first = false) { const auto old = +state; @@ -5983,7 +6103,7 @@ s64 spu_thread::get_ch_value(u32 ch) } // Optimized check - if (raddr && mask1 & SPU_EVENT_LR) + if (is_LR_wait) { if (cache_line_waiter_index == umax) { @@ -6014,6 +6134,59 @@ s64 spu_thread::get_ch_value(u32 ch) set_events(SPU_EVENT_LR); continue; } + + if (!is_first && eventstat_busy_waiting_switch != 1) + { + u8& val = history.front(); + val = static_cast(std::min(val + 1, u8{umax})); + } + } + + if (eventstat_busy_waiting_switch == 1) + { + // Don't be stubborn, force operating sleep if too much time has passed + const u64 time_since = get_system_time() - eventstat_evaluate_time; + + if (time_since >= (utils::get_thread_count() >= 9 ? 50'000 : 3000)) + { + spu_log.trace("SPU RdEventStat wait for 0x%x failed", raddr); + history.front() = 2; + eventstat_busy_waiting_switch = 0; + continue; + } + +#if defined(ARCH_X64) + if (utils::has_um_wait()) + { + if (utils::has_waitpkg()) + { + __tpause(std::min(eventstat_spin_count, 10) * 500, 0x1); + } + else + { + struct check_wait_t + { + static FORCE_INLINE bool needs_wait(u64 rtime, const atomic_t& mem_rtime) noexcept + { + return rtime == mem_rtime; + } + }; + + // Provide the first X64 cache line of the reservation to be tracked + __mwaitx(std::min(eventstat_spin_count, 17) * 500, 0xf0, std::addressof(*resrv_mem), +rtime, vm::reservation_acquire(raddr)); + } + } + else +#endif + { + busy_wait(300); + } + + // Check other reservations in other threads + lv2_obj::notify_all(); + + eventstat_spin_count++; + continue; } if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index e0cc9ca0e7..ce84e9e1b1 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -806,6 +806,14 @@ public: u32 getllar_busy_waiting_switch = umax; // umax means the test needs evaluation, otherwise it's a boolean u64 getllar_evaluate_time = 0; + u32 eventstat_raddr = 0; + u32 eventstat_getllar = 0; + u64 eventstat_block_counter = 0; + u64 eventstat_spu_group_restart = 0; + u64 eventstat_spin_count = 0; + u64 eventstat_evaluate_time = 0; + u32 eventstat_busy_waiting_switch = 0; + std::vector mfc_history; u64 mfc_dump_idx = 0; static constexpr u32 max_mfc_dump_idx = 4096; @@ -829,6 +837,7 @@ public: bool stop_flag_removal_protection = false; std::array, SPU_LS_SIZE / 128> getllar_wait_time{}; + std::array, SPU_LS_SIZE / 128> eventstat_wait_time{}; void push_snr(u32 number, u32 value); static void do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls); diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 7035fff5d0..92031a51db 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -31,7 +31,8 @@ struct cfg_root : cfg::node cfg::_enum thread_scheduler{this, "Thread Scheduler Mode", thread_scheduler_mode::os}; cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false }; cfg::_enum spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm }; - cfg::uint<0, 100> spu_reservation_busy_waiting_percentage{ this, "SPU Reservation Busy Waiting Percentage", 0, true }; + cfg::uint<0, 100> spu_reservation_busy_waiting_percentage{ this, "SPU Reservation Busy Waiting Percentage 1", 100, true }; + cfg::_bool spu_reservation_busy_waiting_enabled{ this, "SPU Reservation Busy Waiting Enabled", false, true }; cfg::uint<0, 100> spu_getllar_busy_waiting_percentage{ this, "SPU GETLLAR Busy Waiting Percentage", 100, true }; cfg::_bool spu_getllar_spin_optimization_disabled{ this, "Disable SPU GETLLAR Spin Optimization", false, true }; cfg::_bool spu_debug{ this, "SPU Debug" }; diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index 845e21a8d8..0fda096705 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -52,6 +52,7 @@ enum class emu_settings_type MaxPreemptCount, SPUProfiler, DisableSpinOptimization, + EnabledSPUEventsBusyLoop, // Graphics Renderer, @@ -250,6 +251,7 @@ inline static const std::map settings_location { emu_settings_type::MaxPreemptCount, { "Core", "Max CPU Preempt Count"}}, { emu_settings_type::SPUProfiler, { "Core", "SPU Profiler"}}, { emu_settings_type::DisableSpinOptimization, { "Core", "Disable SPU GETLLAR Spin Optimization"}}, + { emu_settings_type::EnabledSPUEventsBusyLoop, { "Core", "SPU Reservation Busy Waiting Enabled"}}, // Graphics Tab { emu_settings_type::Renderer, { "Video", "Renderer"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 10fa33c31d..abea4b47bf 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -1602,6 +1602,9 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std m_emu_settings->EnhanceCheckBox(ui->disableSpinOptimization, emu_settings_type::DisableSpinOptimization); SubscribeTooltip(ui->disableSpinOptimization, tooltips.settings.disable_spin_optimization); + m_emu_settings->EnhanceCheckBox(ui->enableSpuEventsBusyLoop, emu_settings_type::EnabledSPUEventsBusyLoop); + SubscribeTooltip(ui->enableSpuEventsBusyLoop, tooltips.settings.enable_spu_events_busy_loop); + // Comboboxes m_emu_settings->EnhanceComboBox(ui->maxSPURSThreads, emu_settings_type::MaxSPURSThreads, true); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index 08a453d1f6..7a3d4951a8 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -2429,6 +2429,13 @@ + + + + Enable SPU Events Busy Loop + + + diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index 9ac64c495a..41cfc1e6d8 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -42,6 +42,7 @@ public: const QString disable_vertex_cache = tr("Disables the vertex cache.\nMight resolve missing or flickering graphics output.\nMay degrade performance."); const QString disable_async_host_mm = tr("Force host memory management calls to be inlined instead of handled asynchronously.\nThis can cause severe performance degradation and stuttering in some games.\nThis option is only needed by developers to debug problems with texture cache memory protection."); const QString disable_spin_optimization = tr("Disable SPU GETLLAR spin optimization.\nThis can cause severe performance degradation and stuttering in many games.\nThis option is only needed for a select number of games."); + const QString enable_spu_events_busy_loop = tr("Enable SPU RdEventStat spin.\nThis increases CPU usage, this setting is beneficial for high-threaded CPUs (12+) with select number of games."); const QString zcull_operation_mode = tr("Changes ZCULL report synchronization behaviour. Experiment to find the best option for your game. Approximate mode is recommended for most games.\n· Precise is the most accurate to PS3 behaviour. Required for accurate visuals in some titles such as Demon's Souls and The Darkness.\n· Approximate is a much faster way to generate occlusion data which may not always match what the PS3 would generate. Works well with most PS3 games.\n· Relaxed changes the synchronization method completely and can greatly improve performance in some games or completely break others."); const QString max_spurs_threads = tr("Limits the maximum number of SPURS threads in each thread group.\nMay improve performance in some cases, especially on systems with limited number of hardware threads.\nLimiting the number of threads is likely to cause crashes; it's recommended to keep this at the default value."); const QString sleep_timers_accuracy = tr("Changes the sleep period accuracy.\n'As Host' uses default accuracy of the underlying operating system, while 'All Timers' attempts to improve it.\n'Usleep Only' limits the adjustments to usleep syscall only.\nCan affect performance in unexpected ways."); From 21cdbd90e9a171987c9b7efef0ae1f71a77caae8 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 8 May 2025 14:54:54 +0200 Subject: [PATCH 2/2] trophies/overlays: treat unlocked hidden trophies just like normal unlocked trophies --- .../Overlays/Trophies/overlay_trophy_list_dialog.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/Overlays/Trophies/overlay_trophy_list_dialog.cpp b/rpcs3/Emu/RSX/Overlays/Trophies/overlay_trophy_list_dialog.cpp index bd8acd8446..13db1dff1e 100644 --- a/rpcs3/Emu/RSX/Overlays/Trophies/overlay_trophy_list_dialog.cpp +++ b/rpcs3/Emu/RSX/Overlays/Trophies/overlay_trophy_list_dialog.cpp @@ -31,7 +31,7 @@ namespace rsx if (fs::exists(icon_path)) { - icon_data = std::make_unique(icon_path, details.hidden || locked); + icon_data = std::make_unique(icon_path, locked); static_cast(image.get())->set_raw_image(icon_data.get()); } else @@ -53,7 +53,7 @@ namespace rsx std::unique_ptr text_stack = std::make_unique(); std::unique_ptr padding = std::make_unique(); - std::unique_ptr header_text = std::make_unique