Implement independent CPU preemptions

2025-07-11 17:28:36 +12:00 · 2022-09-06 18:59:23 +03:00 · 2022-09-06 18:59:23 +03:00 · ec7b18dab5
commit ec7b18dab5
parent b9e20dc5c9
12 changed files with 358 additions and 4 deletions
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@ -2,6 +2,7 @@
 #include "RSXThread.h"

 #include "Emu/Cell/PPUCallback.h"
+#include "Emu/Cell/SPUThread.h"
 #include "Emu/Cell/timers.hpp"

 #include "Common/BufferUtils.h"
@ -42,6 +43,8 @@ rsx::frame_capture_data frame_capture;
 extern CellGcmOffsetTable offsetTable;
 extern thread_local std::string(*g_tls_log_prefix)();

+LOG_CHANNEL(perf_log, "PERF");
+
 template <>
 bool serialize<rsx::rsx_state>(utils::serial& ar, rsx::rsx_state& o)
 {
@ -241,6 +244,17 @@ namespace rsx
 		fmt::throw_exception("rsx::get_address(offset=0x%x, location=0x%x): %s%s", offset, location, msg, src_loc{line, col, file, func});
 	}

+	extern void set_rsx_yield_flag() noexcept
+	{
+		if (auto rsx = get_current_renderer())
+		{
+			if (g_cfg.core.allow_rsx_cpu_preempt)
+			{
+				rsx->state += cpu_flag::yield;
+			}
+		}
+	}
+
 	std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count) const
 	{
 		if (single_vertex)
@ -3373,7 +3387,7 @@ namespace rsx

 		switch (frame_limit)
 		{
-		case frame_limit_type::none: limit = 0.; break;
+		case frame_limit_type::none: limit = g_cfg.core.max_cpu_preempt_count_per_frame ? static_cast<double>(g_cfg.video.vblank_rate) : 0.; break;
 		case frame_limit_type::_50: limit = 50.; break;
 		case frame_limit_type::_60: limit = 60.; break;
 		case frame_limit_type::_30: limit = 30.; break;
@ -3478,5 +3492,183 @@ namespace rsx
 				intr_thread->cmd_notify.notify_one();
 			}
 		}
+
+		evaluate_cpu_usage_reduction_limits();
+	}
+
+	void thread::evaluate_cpu_usage_reduction_limits()
+	{
+		const u64 max_preempt_count = g_cfg.core.max_cpu_preempt_count_per_frame;
+
+		if (!max_preempt_count)
+		{
+			frame_times.clear();
+			lv2_obj::set_yield_frequency(0, 0);
+			return;
+		}
+
+		const u64 current_time = get_system_time();
+		const u64 current_tsc = utils::get_tsc();
+		u64 preempt_count = 0;
+
+		if (frame_times.size() >= 60)
+		{
+			u64 diffs = 0;
+
+			for (usz i = 1; i < frame_times.size(); i++)
+			{
+				const u64 cur_diff = frame_times[i].timestamp - frame_times[i - 1].timestamp;
+				diffs += cur_diff;
+			}
+
+			const usz avg_frame_time = diffs / 59;
+
+			u32 lowered_delay = 0;
+			u32 highered_delay = 0;
+			bool can_reevaluate = true;
+			u64 prev_preempt_count = umax;
+
+			for (usz i = frame_times.size() - 30; i < frame_times.size(); i++)
+			{
+				if (prev_preempt_count == umax)
+				{
+					prev_preempt_count = frame_times[i].preempt_count;
+					continue;
+				}
+
+				if (prev_preempt_count != frame_times[i].preempt_count)
+				{
+					if (prev_preempt_count > frame_times[i].preempt_count)
+					{
+						lowered_delay++;
+					}
+					else if (prev_preempt_count < frame_times[i].preempt_count)
+					{
+						highered_delay++;
+					}
+
+					if (i > frame_times.size() - 30)
+					{
+						// Slow preemption count increase
+						can_reevaluate = false;
+					}
+				}
+
+				prev_preempt_count = frame_times[i].preempt_count;
+			}
+
+			preempt_count = frame_times.back().preempt_count;
+
+			u32 fails = 0;
+			u32 hard_fails = 0;
+			bool is_last_frame_a_fail = false;
+
+			auto abs_dst = [](u64 a, u64 b)
+			{
+				return a >= b ? a - b : b - a;
+			};
+
+			for (u32 i = 1; i <= frame_times.size(); i++)
+			{
+				const u64 cur_diff = (i == frame_times.size() ? current_time : frame_times[i].timestamp) - frame_times[i - 1].timestamp;
+
+				if (const u64 diff_of_diff = abs_dst(cur_diff, avg_frame_time);
+					diff_of_diff >= avg_frame_time / 4)
+				{
+					if (diff_of_diff >= avg_frame_time / 2)
+					{
+						highered_delay++;
+						hard_fails++;
+
+						if (i == frame_times.size())
+						{
+							is_last_frame_a_fail = true;
+						}
+					}
+
+					if (fails != umax)
+					{
+						fails++;
+					}
+				}
+			}
+
+			bool hard_measures_taken = false;
+			const usz fps_10 = 10'000'000 / avg_frame_time;
+
+			auto lower_preemption_count = [&]()
+			{
+				if (preempt_count >= 10)
+				{
+					preempt_count -= 10;
+				}
+				else
+				{
+					preempt_count = 0;
+				}
+
+				if (hard_fails > 2 && is_last_frame_a_fail)
+				{
+					hard_measures_taken = preempt_count > 1;
+					preempt_count = preempt_count * 7 / 8;
+					prevent_preempt_increase_tickets = 10;
+				}
+				else
+				{
+					prevent_preempt_increase_tickets = std::max<u32>(7, prevent_preempt_increase_tickets);
+				}
+			};
+
+			if (can_reevaluate)
+			{
+				const bool is_avg_fps_ok = (abs_dst(fps_10, 300) < 3 || abs_dst(fps_10, 600) < 4 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 4 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 3);
+
+				if (!hard_fails && fails < 6 && is_avg_fps_ok)
+				{
+					if (prevent_preempt_increase_tickets)
+					{
+						prevent_preempt_increase_tickets--;
+					}
+					else if (preempt_count < max_preempt_count)
+					{
+						preempt_count += 4;
+					}
+				}
+				else
+				{
+					lower_preemption_count();
+				}
+			}
+			// Sudden FPS drop detection
+			else if ((fails > 10 || hard_fails > 2 || !(abs_dst(fps_10, 300) < 20 || abs_dst(fps_10, 600) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 20 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 30)) && lowered_delay < highered_delay && is_last_frame_a_fail)
+			{
+				lower_preemption_count();
+			}
+
+			perf_log.trace("CPU preemption control: reeval=%d, preempt_count=%d, fails=%d, hard=%d, avg_frame_time=%d, highered=%d, lowered=%d", can_reevaluate, preempt_count, fails, hard_fails, avg_frame_time, highered_delay, lowered_delay);
+
+			if (hard_measures_taken)
+			{
+				preempt_fail_old_preempt_count = std::max<u32>(preempt_fail_old_preempt_count, frame_times.back().preempt_count);
+			}
+			else if (preempt_fail_old_preempt_count)
+			{
+				perf_log.error("Lowering current preemption count significantly due to a performance drop, if this issue persists frequantly consider lowering max preemptions count to 'new-count' or lower. (old-count=%d, new-count=%d)", preempt_fail_old_preempt_count, preempt_count);
+				preempt_fail_old_preempt_count = 0;
+			}
+
+			const u64 tsc_diff = (current_tsc - frame_times.back().tsc);
+
+			// Set an upper limit so a backoff technique would be taken if there is a sudden performance drop
+			// Allow 6% of no yield to reduce significantly the risk of stutter
+			lv2_obj::set_yield_frequency(preempt_count ? tsc_diff / preempt_count : 0, current_tsc + (tsc_diff * 94 / 100));
+			frame_times.pop_front();
+		}
+		else
+		{
+			lv2_obj::set_yield_frequency(0, 0);
+		}
+
+		frame_times.push_back(frame_time_t{preempt_count, current_time, current_tsc});
 	}
 }