mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 00:11:24 +12:00
spu: acquire_pc rewritten to be more precise in timing
- More accurate pauses may slightly reduce performance but makes it more viable to allow more threads to participate without stuttering
This commit is contained in:
parent
dd19622823
commit
17c399d4e8
1 changed files with 26 additions and 9 deletions
|
@ -33,6 +33,7 @@ bool operator ==(const u128& lhs, const u128& rhs)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern u64 get_timebased_time();
|
extern u64 get_timebased_time();
|
||||||
|
extern u64 get_system_time();
|
||||||
|
|
||||||
extern thread_local u64 g_tls_fault_spu;
|
extern thread_local u64 g_tls_fault_spu;
|
||||||
|
|
||||||
|
@ -63,24 +64,40 @@ namespace spu
|
||||||
namespace scheduler
|
namespace scheduler
|
||||||
{
|
{
|
||||||
std::array<std::atomic<u8>, 65536> atomic_instruction_table = {};
|
std::array<std::atomic<u8>, 65536> atomic_instruction_table = {};
|
||||||
constexpr u32 native_jiffy_duration_us = 2000000;
|
constexpr u32 native_jiffy_duration_us = 1500; //About 1ms resolution with a half offset
|
||||||
|
|
||||||
void acquire_pc_address(u32 pc, u32 timeout_ms = 3)
|
void acquire_pc_address(u32 pc, u32 timeout_ms = 3)
|
||||||
{
|
{
|
||||||
const u8 max_concurrent_instructions = (u8)g_cfg.core.preferred_spu_threads;
|
const u8 max_concurrent_instructions = (u8)g_cfg.core.preferred_spu_threads;
|
||||||
const u32 pc_offset = pc >> 2;
|
const u32 pc_offset = pc >> 2;
|
||||||
|
|
||||||
if (timeout_ms > 0)
|
if (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions)
|
||||||
{
|
{
|
||||||
while (timeout_ms--)
|
if (timeout_ms > 0)
|
||||||
{
|
{
|
||||||
if (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions)
|
const auto timeout = timeout_ms * 1000u; //convert to microseconds
|
||||||
std::this_thread::sleep_for(1ms);
|
const auto start = get_system_time();
|
||||||
|
auto remaining = timeout;
|
||||||
|
|
||||||
|
while (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions)
|
||||||
|
{
|
||||||
|
if (remaining >= native_jiffy_duration_us)
|
||||||
|
std::this_thread::sleep_for(1ms);
|
||||||
|
else
|
||||||
|
std::this_thread::yield();
|
||||||
|
|
||||||
|
const auto now = get_system_time();
|
||||||
|
const auto elapsed = now - start;
|
||||||
|
|
||||||
|
if (elapsed > timeout) break;
|
||||||
|
remaining = timeout - elapsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Slight pause if function is overburdened
|
||||||
|
thread_ctrl::wait_for(100);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::this_thread::yield();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_instruction_table[pc_offset]++;
|
atomic_instruction_table[pc_offset]++;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue