mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-06 06:51:26 +12:00
spu: Add polling loop detection and clean up channel access contention code
- spus run a tight gpu-style kernel with no multitasking on the cores themselves -- this does not map well to PC processor cores because they never sleep even when doing nothing -- the poll detection hack tries to find a good place to insert a scheduler yield -- RdDec is a good spot as it signifies the spu kernel is waiting on a timer
This commit is contained in:
parent
05ffb50037
commit
cbd1b28d0d
2 changed files with 31 additions and 8 deletions
|
@ -72,18 +72,27 @@ namespace spu
|
||||||
//TODO: Only initialize loaded memory blocks to save RAM
|
//TODO: Only initialize loaded memory blocks to save RAM
|
||||||
//TODO: Concurrent spu thread limit can be configurable
|
//TODO: Concurrent spu thread limit can be configurable
|
||||||
std::array<executable_block_map, 65536> atomic_instruction_table;
|
std::array<executable_block_map, 65536> atomic_instruction_table;
|
||||||
constexpr u8 max_concurrent_instructions = 1;
|
constexpr u32 native_jiffy_duration_us = 2000000;
|
||||||
|
|
||||||
void acquire_pc_address(u32 pc, u32 timeout_ms = 3)
|
void acquire_pc_address(u32 pc, u32 timeout_ms = 3)
|
||||||
{
|
{
|
||||||
|
const u8 max_concurrent_instructions = (u8)g_cfg.core.preferred_spu_threads;
|
||||||
|
|
||||||
const u32 block = pc >> 12;
|
const u32 block = pc >> 12;
|
||||||
const u32 offset = (pc & 0xFFF) >> 2;
|
const u32 offset = (pc & 0xFFF) >> 2;
|
||||||
|
|
||||||
|
if (timeout_ms > 0)
|
||||||
|
{
|
||||||
while (timeout_ms--)
|
while (timeout_ms--)
|
||||||
{
|
{
|
||||||
if (atomic_instruction_table[block][offset].load(std::memory_order_consume) >= max_concurrent_instructions)
|
if (atomic_instruction_table[block][offset].load(std::memory_order_consume) >= max_concurrent_instructions)
|
||||||
std::this_thread::sleep_for(1ms);
|
std::this_thread::sleep_for(1ms);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
|
||||||
atomic_instruction_table[block][offset]++;
|
atomic_instruction_table[block][offset]++;
|
||||||
}
|
}
|
||||||
|
@ -99,15 +108,21 @@ namespace spu
|
||||||
struct concurrent_execution_watchdog
|
struct concurrent_execution_watchdog
|
||||||
{
|
{
|
||||||
u32 pc = 0;
|
u32 pc = 0;
|
||||||
|
bool active = false;
|
||||||
|
|
||||||
concurrent_execution_watchdog(SPUThread& spu)
|
concurrent_execution_watchdog(SPUThread& spu)
|
||||||
:pc(spu.pc)
|
:pc(spu.pc)
|
||||||
{
|
{
|
||||||
acquire_pc_address(pc);
|
if (g_cfg.core.preferred_spu_threads > 0)
|
||||||
|
{
|
||||||
|
acquire_pc_address(pc, (u32)g_cfg.core.spu_delay_penalty);
|
||||||
|
active = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
~concurrent_execution_watchdog()
|
~concurrent_execution_watchdog()
|
||||||
{
|
{
|
||||||
|
if (active)
|
||||||
release_pc_address(pc);
|
release_pc_address(pc);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -559,7 +574,7 @@ void SPUThread::process_mfc_cmd()
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: investigate lost notifications
|
// TODO: investigate lost notifications
|
||||||
std::this_thread::sleep_for(0us);
|
std::this_thread::yield();
|
||||||
_mm_lfence();
|
_mm_lfence();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -957,7 +972,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
|
||||||
if (ctr > 10000)
|
if (ctr > 10000)
|
||||||
{
|
{
|
||||||
ctr = 0;
|
ctr = 0;
|
||||||
std::this_thread::sleep_for(0us);
|
std::this_thread::yield();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1038,6 +1053,11 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
|
||||||
case SPU_RdDec:
|
case SPU_RdDec:
|
||||||
{
|
{
|
||||||
out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp);
|
out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp);
|
||||||
|
|
||||||
|
//Polling: We might as well hint to the scheduler to slot in another thread since this one is counting down
|
||||||
|
if (g_cfg.core.spu_loop_detection && out > spu::scheduler::native_jiffy_duration_us)
|
||||||
|
std::this_thread::yield();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,6 +272,9 @@ struct cfg_root : cfg::node
|
||||||
cfg::_bool lower_spu_priority{this, "Lower SPU thread priority"};
|
cfg::_bool lower_spu_priority{this, "Lower SPU thread priority"};
|
||||||
cfg::_bool spu_debug{this, "SPU Debug"};
|
cfg::_bool spu_debug{this, "SPU Debug"};
|
||||||
cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
|
cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
|
||||||
|
cfg::_int<0, 6> preferred_spu_threads{this, "Preferred SPU Threads", 0}; //Numnber of hardware threads dedicated to heavy simultaneous spu tasks
|
||||||
|
cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
|
||||||
|
cfg::_bool spu_loop_detection{this, "SPU loop detection", false}; //Try to detect wait loops and trigger thread yield
|
||||||
|
|
||||||
cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::automatic};
|
cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::automatic};
|
||||||
cfg::_bool hook_functions{this, "Hook static functions"};
|
cfg::_bool hook_functions{this, "Hook static functions"};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue