mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 22:41:25 +12:00
utils/sysinfo.cpp: New TSC calibration technique
This commit is contained in:
parent
af052b0627
commit
84217917d5
1 changed files with 61 additions and 34 deletions
|
@ -736,7 +736,7 @@ bool utils::get_low_power_mode()
|
||||||
|
|
||||||
static constexpr ullong round_tsc(ullong val)
|
static constexpr ullong round_tsc(ullong val)
|
||||||
{
|
{
|
||||||
return utils::rounded_div(val, 1'000'000) * 1'000'000;
|
return utils::rounded_div(val, 100'000) * 100'000;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace utils
|
namespace utils
|
||||||
|
@ -744,7 +744,7 @@ namespace utils
|
||||||
u64 s_tsc_freq = 0;
|
u64 s_tsc_freq = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
named_thread<std::function<void()>> s_thread_evaluate_tsc_freq("TSX Evaluate Thread", []()
|
named_thread<std::function<void()>> s_thread_evaluate_tsc_freq("TSC Evaluate Thread", []()
|
||||||
{
|
{
|
||||||
static const ullong cal_tsc = []() -> ullong
|
static const ullong cal_tsc = []() -> ullong
|
||||||
{
|
{
|
||||||
|
@ -767,56 +767,83 @@ named_thread<std::function<void()>> s_thread_evaluate_tsc_freq("TSX Evaluate Thr
|
||||||
|
|
||||||
const ullong timer_freq = freq.QuadPart;
|
const ullong timer_freq = freq.QuadPart;
|
||||||
#else
|
#else
|
||||||
const ullong timer_freq = 1'000'000'000;
|
constexpr ullong timer_freq = 1'000'000'000;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Calibrate TSC
|
constexpr u64 retry_count = 1000;
|
||||||
constexpr int samples = 60;
|
|
||||||
ullong rdtsc_data[samples];
|
|
||||||
ullong timer_data[samples];
|
|
||||||
[[maybe_unused]] ullong error_data[samples];
|
|
||||||
|
|
||||||
// Narrow thread affinity to a single core
|
// First is entry is for the onset measurements, last is for the end measurements
|
||||||
const u64 old_aff = thread_ctrl::get_thread_affinity_mask();
|
constexpr usz sample_count = 2;
|
||||||
thread_ctrl::set_thread_affinity_mask(old_aff & (0 - old_aff));
|
std::array<u64, sample_count> rdtsc_data{};
|
||||||
|
std::array<u64, sample_count> rdtsc_diff{};
|
||||||
|
std::array<u64, sample_count> timer_data{};
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifdef _WIN32
|
||||||
|
LARGE_INTEGER ctr0;
|
||||||
|
QueryPerformanceCounter(&ctr0);
|
||||||
|
const ullong time_base = ctr0.QuadPart;
|
||||||
|
#else
|
||||||
struct timespec ts0;
|
struct timespec ts0;
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts0);
|
clock_gettime(CLOCK_MONOTONIC, &ts0);
|
||||||
ullong sec_base = ts0.tv_sec;
|
const ullong sec_base = ts0.tv_sec;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < samples; i++)
|
for (usz sample = 0; sample < sample_count; sample++)
|
||||||
{
|
{
|
||||||
|
for (usz i = 0; i < retry_count; i++)
|
||||||
|
{
|
||||||
|
const u64 rdtsc_read = (utils::lfence(), utils::get_tsc());
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
Sleep(2);
|
LARGE_INTEGER ctr;
|
||||||
error_data[i] = (utils::lfence(), utils::get_tsc());
|
QueryPerformanceCounter(&ctr);
|
||||||
LARGE_INTEGER ctr;
|
|
||||||
QueryPerformanceCounter(&ctr);
|
|
||||||
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
|
|
||||||
timer_data[i] = ctr.QuadPart;
|
|
||||||
#else
|
#else
|
||||||
usleep(500);
|
struct timespec ts;
|
||||||
error_data[i] = (utils::lfence(), utils::get_tsc());
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
struct timespec ts;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
||||||
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
|
|
||||||
timer_data[i] = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
|
|
||||||
#endif
|
#endif
|
||||||
|
const u64 rdtsc_read2 = (utils::lfence(), utils::get_tsc());
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
const u64 timer_read = ctr.QuadPart - time_base;
|
||||||
|
#else
|
||||||
|
const u64 timer_read = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (i == 0 || (rdtsc_read2 >= rdtsc_read && rdtsc_read2 - rdtsc_read < rdtsc_diff[sample]))
|
||||||
|
{
|
||||||
|
rdtsc_data[sample] = rdtsc_read; // Note: rdtsc_read2 can also be written here because of the assumption of accuracy
|
||||||
|
timer_data[sample] = timer_read;
|
||||||
|
rdtsc_diff[sample] = rdtsc_read2 >= rdtsc_read ? rdtsc_read2 - rdtsc_read : u64{umax};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rdtsc_read2 - rdtsc_read < std::min<usz>(i, 300) && rdtsc_read2 >= rdtsc_read)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample < sample_count - 1)
|
||||||
|
{
|
||||||
|
// Sleep 20ms between first and last sample
|
||||||
|
#ifdef _WIN32
|
||||||
|
Sleep(20);
|
||||||
|
#else
|
||||||
|
usleep(20'000);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore main thread affinity
|
if (timer_data[1] == timer_data[0])
|
||||||
thread_ctrl::set_thread_affinity_mask(old_aff);
|
|
||||||
|
|
||||||
// Compute average TSC
|
|
||||||
ullong acc = 0;
|
|
||||||
for (int i = 0; i < samples - 1; i++)
|
|
||||||
{
|
{
|
||||||
acc += (rdtsc_data[i + 1] - rdtsc_data[i]) * timer_freq / (timer_data[i + 1] - timer_data[i]);
|
// Division by zero
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u128 data = u128_from_mul(rdtsc_data[1] - rdtsc_data[0], timer_freq);
|
||||||
|
|
||||||
|
const u64 res = utils::udiv128(static_cast<u64>(data >> 64), static_cast<u64>(data), (timer_data[1] - timer_data[0]));
|
||||||
|
|
||||||
// Rounding
|
// Rounding
|
||||||
return round_tsc(acc / (samples - 1));
|
return round_tsc(res);
|
||||||
}();
|
}();
|
||||||
|
|
||||||
atomic_storage<u64>::release(utils::s_tsc_freq, cal_tsc);
|
atomic_storage<u64>::release(utils::s_tsc_freq, cal_tsc);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue