Implement independent CPU preemptions

This commit is contained in:
Eladash 2022-09-06 18:59:23 +03:00 committed by kd-11
parent b9e20dc5c9
commit ec7b18dab5
12 changed files with 358 additions and 4 deletions

View file

@ -2,6 +2,7 @@
#include "RSXThread.h"
#include "Emu/Cell/PPUCallback.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/timers.hpp"
#include "Common/BufferUtils.h"
@ -42,6 +43,8 @@ rsx::frame_capture_data frame_capture;
extern CellGcmOffsetTable offsetTable;
extern thread_local std::string(*g_tls_log_prefix)();
LOG_CHANNEL(perf_log, "PERF");
template <>
bool serialize<rsx::rsx_state>(utils::serial& ar, rsx::rsx_state& o)
{
@ -241,6 +244,17 @@ namespace rsx
fmt::throw_exception("rsx::get_address(offset=0x%x, location=0x%x): %s%s", offset, location, msg, src_loc{line, col, file, func});
}
extern void set_rsx_yield_flag() noexcept
{
if (auto rsx = get_current_renderer())
{
if (g_cfg.core.allow_rsx_cpu_preempt)
{
rsx->state += cpu_flag::yield;
}
}
}
std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count) const
{
if (single_vertex)
@ -3373,7 +3387,7 @@ namespace rsx
switch (frame_limit)
{
case frame_limit_type::none: limit = 0.; break;
case frame_limit_type::none: limit = g_cfg.core.max_cpu_preempt_count_per_frame ? static_cast<double>(g_cfg.video.vblank_rate) : 0.; break;
case frame_limit_type::_50: limit = 50.; break;
case frame_limit_type::_60: limit = 60.; break;
case frame_limit_type::_30: limit = 30.; break;
@ -3478,5 +3492,183 @@ namespace rsx
intr_thread->cmd_notify.notify_one();
}
}
evaluate_cpu_usage_reduction_limits();
}
void thread::evaluate_cpu_usage_reduction_limits()
{
const u64 max_preempt_count = g_cfg.core.max_cpu_preempt_count_per_frame;
if (!max_preempt_count)
{
frame_times.clear();
lv2_obj::set_yield_frequency(0, 0);
return;
}
const u64 current_time = get_system_time();
const u64 current_tsc = utils::get_tsc();
u64 preempt_count = 0;
if (frame_times.size() >= 60)
{
u64 diffs = 0;
for (usz i = 1; i < frame_times.size(); i++)
{
const u64 cur_diff = frame_times[i].timestamp - frame_times[i - 1].timestamp;
diffs += cur_diff;
}
const usz avg_frame_time = diffs / 59;
u32 lowered_delay = 0;
u32 highered_delay = 0;
bool can_reevaluate = true;
u64 prev_preempt_count = umax;
for (usz i = frame_times.size() - 30; i < frame_times.size(); i++)
{
if (prev_preempt_count == umax)
{
prev_preempt_count = frame_times[i].preempt_count;
continue;
}
if (prev_preempt_count != frame_times[i].preempt_count)
{
if (prev_preempt_count > frame_times[i].preempt_count)
{
lowered_delay++;
}
else if (prev_preempt_count < frame_times[i].preempt_count)
{
highered_delay++;
}
if (i > frame_times.size() - 30)
{
// Slow preemption count increase
can_reevaluate = false;
}
}
prev_preempt_count = frame_times[i].preempt_count;
}
preempt_count = frame_times.back().preempt_count;
u32 fails = 0;
u32 hard_fails = 0;
bool is_last_frame_a_fail = false;
auto abs_dst = [](u64 a, u64 b)
{
return a >= b ? a - b : b - a;
};
for (u32 i = 1; i <= frame_times.size(); i++)
{
const u64 cur_diff = (i == frame_times.size() ? current_time : frame_times[i].timestamp) - frame_times[i - 1].timestamp;
if (const u64 diff_of_diff = abs_dst(cur_diff, avg_frame_time);
diff_of_diff >= avg_frame_time / 4)
{
if (diff_of_diff >= avg_frame_time / 2)
{
highered_delay++;
hard_fails++;
if (i == frame_times.size())
{
is_last_frame_a_fail = true;
}
}
if (fails != umax)
{
fails++;
}
}
}
bool hard_measures_taken = false;
const usz fps_10 = 10'000'000 / avg_frame_time;
auto lower_preemption_count = [&]()
{
if (preempt_count >= 10)
{
preempt_count -= 10;
}
else
{
preempt_count = 0;
}
if (hard_fails > 2 && is_last_frame_a_fail)
{
hard_measures_taken = preempt_count > 1;
preempt_count = preempt_count * 7 / 8;
prevent_preempt_increase_tickets = 10;
}
else
{
prevent_preempt_increase_tickets = std::max<u32>(7, prevent_preempt_increase_tickets);
}
};
if (can_reevaluate)
{
const bool is_avg_fps_ok = (abs_dst(fps_10, 300) < 3 || abs_dst(fps_10, 600) < 4 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 4 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 3);
if (!hard_fails && fails < 6 && is_avg_fps_ok)
{
if (prevent_preempt_increase_tickets)
{
prevent_preempt_increase_tickets--;
}
else if (preempt_count < max_preempt_count)
{
preempt_count += 4;
}
}
else
{
lower_preemption_count();
}
}
// Sudden FPS drop detection
else if ((fails > 10 || hard_fails > 2 || !(abs_dst(fps_10, 300) < 20 || abs_dst(fps_10, 600) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 20 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 30)) && lowered_delay < highered_delay && is_last_frame_a_fail)
{
lower_preemption_count();
}
perf_log.trace("CPU preemption control: reeval=%d, preempt_count=%d, fails=%d, hard=%d, avg_frame_time=%d, highered=%d, lowered=%d", can_reevaluate, preempt_count, fails, hard_fails, avg_frame_time, highered_delay, lowered_delay);
if (hard_measures_taken)
{
preempt_fail_old_preempt_count = std::max<u32>(preempt_fail_old_preempt_count, frame_times.back().preempt_count);
}
else if (preempt_fail_old_preempt_count)
{
perf_log.error("Lowering current preemption count significantly due to a performance drop, if this issue persists frequantly consider lowering max preemptions count to 'new-count' or lower. (old-count=%d, new-count=%d)", preempt_fail_old_preempt_count, preempt_count);
preempt_fail_old_preempt_count = 0;
}
const u64 tsc_diff = (current_tsc - frame_times.back().tsc);
// Set an upper limit so a backoff technique would be taken if there is a sudden performance drop
// Allow 6% of no yield to reduce significantly the risk of stutter
lv2_obj::set_yield_frequency(preempt_count ? tsc_diff / preempt_count : 0, current_tsc + (tsc_diff * 94 / 100));
frame_times.pop_front();
}
else
{
lv2_obj::set_yield_frequency(0, 0);
}
frame_times.push_back(frame_time_t{preempt_count, current_time, current_tsc});
}
}