mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 06:21:26 +12:00
SPU: Use usermode waiting for busy GETLLAR loop
This commit is contained in:
parent
dddd12f66b
commit
6adc7f9ee6
3 changed files with 78 additions and 1 deletions
|
@ -37,6 +37,15 @@
|
||||||
#include "util/sysinfo.hpp"
|
#include "util/sysinfo.hpp"
|
||||||
#include "util/serialization.hpp"
|
#include "util/serialization.hpp"
|
||||||
|
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#include <immintrin.h>
|
||||||
|
#else
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
using spu_rdata_t = decltype(spu_thread::rdata);
|
using spu_rdata_t = decltype(spu_thread::rdata);
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -320,6 +329,40 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define mwaitx_func
|
||||||
|
#define waitpkg_func
|
||||||
|
#else
|
||||||
|
#define mwaitx_func __attribute__((__target__("mwaitx")))
|
||||||
|
#define waitpkg_func __attribute__((__target__("waitpkg")))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
// Waits for a number of TSC clock cycles in power optimized state
|
||||||
|
// Cstate is represented in bits [7:4]+1 cstate. So C0 requires bits [7:4] to be set to 0xf, C1 requires bits [7:4] to be set to 0.
|
||||||
|
template <typename T, typename... Args>
|
||||||
|
mwaitx_func static void __mwaitx(u32 cycles, u32 cstate, const void* cline, const Args&... args)
|
||||||
|
{
|
||||||
|
constexpr u32 timer_enable = 0x2;
|
||||||
|
|
||||||
|
// monitorx will wake if the cache line is written to, use it for reservations which fits it almost perfectly
|
||||||
|
_mm_monitorx(const_cast<void*>(cline), 0, 0);
|
||||||
|
|
||||||
|
// Use static function to force inline
|
||||||
|
if (T::needs_wait(args...))
|
||||||
|
{
|
||||||
|
_mm_mwaitx(timer_enable, cstate, cycles);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
|
||||||
|
waitpkg_func static void __tpause(u32 cycles, u32 cstate)
|
||||||
|
{
|
||||||
|
const u64 tsc = utils::get_tsc() + cycles;
|
||||||
|
_tpause(cstate, tsc);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
||||||
|
|
||||||
extern thread_local u64 g_tls_fault_spu;
|
extern thread_local u64 g_tls_fault_spu;
|
||||||
|
@ -4113,7 +4156,32 @@ bool spu_thread::process_mfc_cmd()
|
||||||
|
|
||||||
if (getllar_busy_waiting_switch == 1)
|
if (getllar_busy_waiting_switch == 1)
|
||||||
{
|
{
|
||||||
busy_wait(300);
|
#if defined(ARCH_X64)
|
||||||
|
if (utils::has_um_wait())
|
||||||
|
{
|
||||||
|
if (utils::has_waitpkg())
|
||||||
|
{
|
||||||
|
__tpause(std::min<u32>(getllar_spin_count, 10) * 500, 0x1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
struct check_wait_t
|
||||||
|
{
|
||||||
|
static FORCE_INLINE bool needs_wait(u64 rtime, const atomic_t<u64>& mem_rtime) noexcept
|
||||||
|
{
|
||||||
|
return rtime == mem_rtime;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Provide the first X64 cache line of the reservation to be tracked
|
||||||
|
__mwaitx<check_wait_t>(std::min<u32>(getllar_spin_count, 17) * 500, 0xf0, std::addressof(data), +rtime, vm::reservation_acquire(addr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
busy_wait(300);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -358,6 +358,13 @@ bool utils::has_appropriate_um_wait()
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Similar to the above function but allow execution if alternatives such as yield are not wanted
|
||||||
|
bool utils::has_um_wait()
|
||||||
|
{
|
||||||
|
static const bool g_value = (has_waitx() || has_waitpkg()) && get_tsc_freq();
|
||||||
|
return g_value;
|
||||||
|
}
|
||||||
|
|
||||||
u32 utils::get_rep_movsb_threshold()
|
u32 utils::get_rep_movsb_threshold()
|
||||||
{
|
{
|
||||||
static const u32 g_value = []()
|
static const u32 g_value = []()
|
||||||
|
|
|
@ -59,6 +59,8 @@ namespace utils
|
||||||
|
|
||||||
bool has_appropriate_um_wait();
|
bool has_appropriate_um_wait();
|
||||||
|
|
||||||
|
bool has_um_wait();
|
||||||
|
|
||||||
std::string get_cpu_brand();
|
std::string get_cpu_brand();
|
||||||
|
|
||||||
std::string get_system_info();
|
std::string get_system_info();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue