rsx: Improve balancing of the offloader thread

- Use two counters to avoid atomic operations
- Yield instead of sleeping because some games are very sensitive to timing
This commit is contained in:
kd-11 2019-06-19 22:01:48 +03:00 committed by kd-11
parent 8249d51aa8
commit 4ff77a8555
4 changed files with 29 additions and 13 deletions

View file

@ -1797,7 +1797,7 @@ bool spu_thread::process_mfc_cmd()
break; break;
} }
thread_ctrl::wait_for(100); thread_ctrl::wait_for(500);
} }
if (test_stopped()) if (test_stopped())

View file

@ -4,12 +4,20 @@
#include "Emu/System.h" #include "Emu/System.h"
#include "RSXOffload.h" #include "RSXOffload.h"
#include <thread>
namespace rsx namespace rsx
{ {
// initialization // initialization
void dma_manager::init() void dma_manager::init()
{ {
m_worker_state = thread_state::created; m_worker_state = thread_state::created;
m_enqueued_count.store(0);
m_processed_count = 0;
// Empty work queue in case of stale contents
m_work_queue.pop_all();
thread_ctrl::spawn("RSX offloader", [this]() thread_ctrl::spawn("RSX offloader", [this]()
{ {
if (!g_cfg.video.multithreaded_rsx) if (!g_cfg.video.multithreaded_rsx)
@ -25,7 +33,7 @@ namespace rsx
while (m_worker_state != thread_state::finished) while (m_worker_state != thread_state::finished)
{ {
if (m_jobs_count) if (m_enqueued_count.load() != m_processed_count)
{ {
for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front()) for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front())
{ {
@ -49,16 +57,17 @@ namespace rsx
fmt::throw_exception("Unreachable" HERE); fmt::throw_exception("Unreachable" HERE);
} }
m_jobs_count--; ++m_processed_count;
} }
} }
else else
{ {
thread_ctrl::wait_for(500); // Yield
std::this_thread::yield();
} }
} }
m_jobs_count.store(0); m_processed_count = m_enqueued_count.load();
}); });
} }
@ -71,7 +80,7 @@ namespace rsx
} }
else else
{ {
++m_jobs_count; ++m_enqueued_count;
m_work_queue.push(dst, src, length); m_work_queue.push(dst, src, length);
} }
} }
@ -84,7 +93,7 @@ namespace rsx
} }
else else
{ {
++m_jobs_count; ++m_enqueued_count;
m_work_queue.push(dst, src, length); m_work_queue.push(dst, src, length);
} }
} }
@ -99,7 +108,7 @@ namespace rsx
} }
else else
{ {
++m_jobs_count; ++m_enqueued_count;
m_work_queue.push(dst, primitive, count); m_work_queue.push(dst, primitive, count);
} }
} }
@ -107,11 +116,14 @@ namespace rsx
// Synchronization // Synchronization
void dma_manager::sync() void dma_manager::sync()
{ {
if (g_cfg.video.multithreaded_rsx) if (LIKELY(m_enqueued_count.load() == m_processed_count))
{ {
while (m_jobs_count) // Nothing to do
_mm_lfence(); return;
} }
while (m_enqueued_count.load() != m_processed_count)
_mm_lfence();
} }
void dma_manager::join() void dma_manager::join()

View file

@ -42,8 +42,9 @@ namespace rsx
}; };
lf_queue<transport_packet> m_work_queue; lf_queue<transport_packet> m_work_queue;
atomic_t<int> m_jobs_count; atomic_t<u64> m_enqueued_count{ 0 };
thread_state m_worker_state; volatile u64 m_processed_count = 0;
thread_state m_worker_state = thread_state::detached;
// TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes // TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes
const u32 max_immediate_transfer_size = 3584; const u32 max_immediate_transfer_size = 3584;

View file

@ -2241,6 +2241,9 @@ namespace rsx
// Fragment constants may have been updated // Fragment constants may have been updated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty; m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
// DMA sync; if you need this, don't use MTRSX
// g_dma_manager.sync();
//TODO: On sync every sub-unit should finish any pending tasks //TODO: On sync every sub-unit should finish any pending tasks
//Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently //Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently
//verify (HERE), async_tasks_pending.load() == 0; //verify (HERE), async_tasks_pending.load() == 0;