mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-07 07:21:25 +12:00
rsx: Improve balancing of the offloader thread
- Use two counters to avoid atomic operations - Yield instead of sleeping because some games are very sensitive to timing
This commit is contained in:
parent
8249d51aa8
commit
4ff77a8555
4 changed files with 29 additions and 13 deletions
|
@ -1797,7 +1797,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
break;
|
||||
}
|
||||
|
||||
thread_ctrl::wait_for(100);
|
||||
thread_ctrl::wait_for(500);
|
||||
}
|
||||
|
||||
if (test_stopped())
|
||||
|
|
|
@ -4,12 +4,20 @@
|
|||
#include "Emu/System.h"
|
||||
#include "RSXOffload.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
// initialization
|
||||
void dma_manager::init()
|
||||
{
|
||||
m_worker_state = thread_state::created;
|
||||
m_enqueued_count.store(0);
|
||||
m_processed_count = 0;
|
||||
|
||||
// Empty work queue in case of stale contents
|
||||
m_work_queue.pop_all();
|
||||
|
||||
thread_ctrl::spawn("RSX offloader", [this]()
|
||||
{
|
||||
if (!g_cfg.video.multithreaded_rsx)
|
||||
|
@ -25,7 +33,7 @@ namespace rsx
|
|||
|
||||
while (m_worker_state != thread_state::finished)
|
||||
{
|
||||
if (m_jobs_count)
|
||||
if (m_enqueued_count.load() != m_processed_count)
|
||||
{
|
||||
for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front())
|
||||
{
|
||||
|
@ -49,16 +57,17 @@ namespace rsx
|
|||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
m_jobs_count--;
|
||||
++m_processed_count;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
thread_ctrl::wait_for(500);
|
||||
// Yield
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
||||
m_jobs_count.store(0);
|
||||
m_processed_count = m_enqueued_count.load();
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -71,7 +80,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
++m_jobs_count;
|
||||
++m_enqueued_count;
|
||||
m_work_queue.push(dst, src, length);
|
||||
}
|
||||
}
|
||||
|
@ -84,7 +93,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
++m_jobs_count;
|
||||
++m_enqueued_count;
|
||||
m_work_queue.push(dst, src, length);
|
||||
}
|
||||
}
|
||||
|
@ -99,7 +108,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
++m_jobs_count;
|
||||
++m_enqueued_count;
|
||||
m_work_queue.push(dst, primitive, count);
|
||||
}
|
||||
}
|
||||
|
@ -107,11 +116,14 @@ namespace rsx
|
|||
// Synchronization
|
||||
void dma_manager::sync()
|
||||
{
|
||||
if (g_cfg.video.multithreaded_rsx)
|
||||
if (LIKELY(m_enqueued_count.load() == m_processed_count))
|
||||
{
|
||||
while (m_jobs_count)
|
||||
_mm_lfence();
|
||||
// Nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
while (m_enqueued_count.load() != m_processed_count)
|
||||
_mm_lfence();
|
||||
}
|
||||
|
||||
void dma_manager::join()
|
||||
|
|
|
@ -42,8 +42,9 @@ namespace rsx
|
|||
};
|
||||
|
||||
lf_queue<transport_packet> m_work_queue;
|
||||
atomic_t<int> m_jobs_count;
|
||||
thread_state m_worker_state;
|
||||
atomic_t<u64> m_enqueued_count{ 0 };
|
||||
volatile u64 m_processed_count = 0;
|
||||
thread_state m_worker_state = thread_state::detached;
|
||||
|
||||
// TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes
|
||||
const u32 max_immediate_transfer_size = 3584;
|
||||
|
|
|
@ -2241,6 +2241,9 @@ namespace rsx
|
|||
// Fragment constants may have been updated
|
||||
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
|
||||
|
||||
// DMA sync; if you need this, don't use MTRSX
|
||||
// g_dma_manager.sync();
|
||||
|
||||
//TODO: On sync every sub-unit should finish any pending tasks
|
||||
//Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently
|
||||
//verify (HERE), async_tasks_pending.load() == 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue