mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 17:01:24 +12:00
Implement independent CPU preemptions
This commit is contained in:
parent
b9e20dc5c9
commit
ec7b18dab5
12 changed files with 358 additions and 4 deletions
|
@ -2,6 +2,7 @@
|
|||
#include "RSXThread.h"
|
||||
|
||||
#include "Emu/Cell/PPUCallback.h"
|
||||
#include "Emu/Cell/SPUThread.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include "Common/BufferUtils.h"
|
||||
|
@ -42,6 +43,8 @@ rsx::frame_capture_data frame_capture;
|
|||
extern CellGcmOffsetTable offsetTable;
|
||||
extern thread_local std::string(*g_tls_log_prefix)();
|
||||
|
||||
LOG_CHANNEL(perf_log, "PERF");
|
||||
|
||||
template <>
|
||||
bool serialize<rsx::rsx_state>(utils::serial& ar, rsx::rsx_state& o)
|
||||
{
|
||||
|
@ -241,6 +244,17 @@ namespace rsx
|
|||
fmt::throw_exception("rsx::get_address(offset=0x%x, location=0x%x): %s%s", offset, location, msg, src_loc{line, col, file, func});
|
||||
}
|
||||
|
||||
extern void set_rsx_yield_flag() noexcept
|
||||
{
|
||||
if (auto rsx = get_current_renderer())
|
||||
{
|
||||
if (g_cfg.core.allow_rsx_cpu_preempt)
|
||||
{
|
||||
rsx->state += cpu_flag::yield;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count) const
|
||||
{
|
||||
if (single_vertex)
|
||||
|
@ -3373,7 +3387,7 @@ namespace rsx
|
|||
|
||||
switch (frame_limit)
|
||||
{
|
||||
case frame_limit_type::none: limit = 0.; break;
|
||||
case frame_limit_type::none: limit = g_cfg.core.max_cpu_preempt_count_per_frame ? static_cast<double>(g_cfg.video.vblank_rate) : 0.; break;
|
||||
case frame_limit_type::_50: limit = 50.; break;
|
||||
case frame_limit_type::_60: limit = 60.; break;
|
||||
case frame_limit_type::_30: limit = 30.; break;
|
||||
|
@ -3478,5 +3492,183 @@ namespace rsx
|
|||
intr_thread->cmd_notify.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
evaluate_cpu_usage_reduction_limits();
|
||||
}
|
||||
|
||||
void thread::evaluate_cpu_usage_reduction_limits()
|
||||
{
|
||||
const u64 max_preempt_count = g_cfg.core.max_cpu_preempt_count_per_frame;
|
||||
|
||||
if (!max_preempt_count)
|
||||
{
|
||||
frame_times.clear();
|
||||
lv2_obj::set_yield_frequency(0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 current_time = get_system_time();
|
||||
const u64 current_tsc = utils::get_tsc();
|
||||
u64 preempt_count = 0;
|
||||
|
||||
if (frame_times.size() >= 60)
|
||||
{
|
||||
u64 diffs = 0;
|
||||
|
||||
for (usz i = 1; i < frame_times.size(); i++)
|
||||
{
|
||||
const u64 cur_diff = frame_times[i].timestamp - frame_times[i - 1].timestamp;
|
||||
diffs += cur_diff;
|
||||
}
|
||||
|
||||
const usz avg_frame_time = diffs / 59;
|
||||
|
||||
u32 lowered_delay = 0;
|
||||
u32 highered_delay = 0;
|
||||
bool can_reevaluate = true;
|
||||
u64 prev_preempt_count = umax;
|
||||
|
||||
for (usz i = frame_times.size() - 30; i < frame_times.size(); i++)
|
||||
{
|
||||
if (prev_preempt_count == umax)
|
||||
{
|
||||
prev_preempt_count = frame_times[i].preempt_count;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_preempt_count != frame_times[i].preempt_count)
|
||||
{
|
||||
if (prev_preempt_count > frame_times[i].preempt_count)
|
||||
{
|
||||
lowered_delay++;
|
||||
}
|
||||
else if (prev_preempt_count < frame_times[i].preempt_count)
|
||||
{
|
||||
highered_delay++;
|
||||
}
|
||||
|
||||
if (i > frame_times.size() - 30)
|
||||
{
|
||||
// Slow preemption count increase
|
||||
can_reevaluate = false;
|
||||
}
|
||||
}
|
||||
|
||||
prev_preempt_count = frame_times[i].preempt_count;
|
||||
}
|
||||
|
||||
preempt_count = frame_times.back().preempt_count;
|
||||
|
||||
u32 fails = 0;
|
||||
u32 hard_fails = 0;
|
||||
bool is_last_frame_a_fail = false;
|
||||
|
||||
auto abs_dst = [](u64 a, u64 b)
|
||||
{
|
||||
return a >= b ? a - b : b - a;
|
||||
};
|
||||
|
||||
for (u32 i = 1; i <= frame_times.size(); i++)
|
||||
{
|
||||
const u64 cur_diff = (i == frame_times.size() ? current_time : frame_times[i].timestamp) - frame_times[i - 1].timestamp;
|
||||
|
||||
if (const u64 diff_of_diff = abs_dst(cur_diff, avg_frame_time);
|
||||
diff_of_diff >= avg_frame_time / 4)
|
||||
{
|
||||
if (diff_of_diff >= avg_frame_time / 2)
|
||||
{
|
||||
highered_delay++;
|
||||
hard_fails++;
|
||||
|
||||
if (i == frame_times.size())
|
||||
{
|
||||
is_last_frame_a_fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fails != umax)
|
||||
{
|
||||
fails++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool hard_measures_taken = false;
|
||||
const usz fps_10 = 10'000'000 / avg_frame_time;
|
||||
|
||||
auto lower_preemption_count = [&]()
|
||||
{
|
||||
if (preempt_count >= 10)
|
||||
{
|
||||
preempt_count -= 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
preempt_count = 0;
|
||||
}
|
||||
|
||||
if (hard_fails > 2 && is_last_frame_a_fail)
|
||||
{
|
||||
hard_measures_taken = preempt_count > 1;
|
||||
preempt_count = preempt_count * 7 / 8;
|
||||
prevent_preempt_increase_tickets = 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
prevent_preempt_increase_tickets = std::max<u32>(7, prevent_preempt_increase_tickets);
|
||||
}
|
||||
};
|
||||
|
||||
if (can_reevaluate)
|
||||
{
|
||||
const bool is_avg_fps_ok = (abs_dst(fps_10, 300) < 3 || abs_dst(fps_10, 600) < 4 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 4 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 3);
|
||||
|
||||
if (!hard_fails && fails < 6 && is_avg_fps_ok)
|
||||
{
|
||||
if (prevent_preempt_increase_tickets)
|
||||
{
|
||||
prevent_preempt_increase_tickets--;
|
||||
}
|
||||
else if (preempt_count < max_preempt_count)
|
||||
{
|
||||
preempt_count += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
lower_preemption_count();
|
||||
}
|
||||
}
|
||||
// Sudden FPS drop detection
|
||||
else if ((fails > 10 || hard_fails > 2 || !(abs_dst(fps_10, 300) < 20 || abs_dst(fps_10, 600) < 30 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10) < 20 || abs_dst(fps_10, g_cfg.video.vblank_rate * 10 / 2) < 30)) && lowered_delay < highered_delay && is_last_frame_a_fail)
|
||||
{
|
||||
lower_preemption_count();
|
||||
}
|
||||
|
||||
perf_log.trace("CPU preemption control: reeval=%d, preempt_count=%d, fails=%d, hard=%d, avg_frame_time=%d, highered=%d, lowered=%d", can_reevaluate, preempt_count, fails, hard_fails, avg_frame_time, highered_delay, lowered_delay);
|
||||
|
||||
if (hard_measures_taken)
|
||||
{
|
||||
preempt_fail_old_preempt_count = std::max<u32>(preempt_fail_old_preempt_count, frame_times.back().preempt_count);
|
||||
}
|
||||
else if (preempt_fail_old_preempt_count)
|
||||
{
|
||||
perf_log.error("Lowering current preemption count significantly due to a performance drop, if this issue persists frequantly consider lowering max preemptions count to 'new-count' or lower. (old-count=%d, new-count=%d)", preempt_fail_old_preempt_count, preempt_count);
|
||||
preempt_fail_old_preempt_count = 0;
|
||||
}
|
||||
|
||||
const u64 tsc_diff = (current_tsc - frame_times.back().tsc);
|
||||
|
||||
// Set an upper limit so a backoff technique would be taken if there is a sudden performance drop
|
||||
// Allow 6% of no yield to reduce significantly the risk of stutter
|
||||
lv2_obj::set_yield_frequency(preempt_count ? tsc_diff / preempt_count : 0, current_tsc + (tsc_diff * 94 / 100));
|
||||
frame_times.pop_front();
|
||||
}
|
||||
else
|
||||
{
|
||||
lv2_obj::set_yield_frequency(0, 0);
|
||||
}
|
||||
|
||||
frame_times.push_back(frame_time_t{preempt_count, current_time, current_tsc});
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue