rsx: Improve balancing of the offloader thread

- Use two counters to avoid atomic operations
- Yield instead of sleeping because some games are very sensitive to timing
This commit is contained in:
kd-11 2019-06-19 22:01:48 +03:00 committed by kd-11
parent 8249d51aa8
commit 4ff77a8555
4 changed files with 29 additions and 13 deletions

View file

@ -1797,7 +1797,7 @@ bool spu_thread::process_mfc_cmd()
break;
}
thread_ctrl::wait_for(100);
thread_ctrl::wait_for(500);
}
if (test_stopped())

View file

@ -4,12 +4,20 @@
#include "Emu/System.h"
#include "RSXOffload.h"
#include <thread>
namespace rsx
{
// initialization
void dma_manager::init()
{
m_worker_state = thread_state::created;
m_enqueued_count.store(0);
m_processed_count = 0;
// Empty work queue in case of stale contents
m_work_queue.pop_all();
thread_ctrl::spawn("RSX offloader", [this]()
{
if (!g_cfg.video.multithreaded_rsx)
@ -25,7 +33,7 @@ namespace rsx
while (m_worker_state != thread_state::finished)
{
if (m_jobs_count)
if (m_enqueued_count.load() != m_processed_count)
{
for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front())
{
@ -49,16 +57,17 @@ namespace rsx
fmt::throw_exception("Unreachable" HERE);
}
m_jobs_count--;
++m_processed_count;
}
}
else
{
thread_ctrl::wait_for(500);
// Yield
std::this_thread::yield();
}
}
m_jobs_count.store(0);
m_processed_count = m_enqueued_count.load();
});
}
@ -71,7 +80,7 @@ namespace rsx
}
else
{
++m_jobs_count;
++m_enqueued_count;
m_work_queue.push(dst, src, length);
}
}
@ -84,7 +93,7 @@ namespace rsx
}
else
{
++m_jobs_count;
++m_enqueued_count;
m_work_queue.push(dst, src, length);
}
}
@ -99,7 +108,7 @@ namespace rsx
}
else
{
++m_jobs_count;
++m_enqueued_count;
m_work_queue.push(dst, primitive, count);
}
}
@ -107,11 +116,14 @@ namespace rsx
// Synchronization
void dma_manager::sync()
{
if (g_cfg.video.multithreaded_rsx)
if (LIKELY(m_enqueued_count.load() == m_processed_count))
{
while (m_jobs_count)
_mm_lfence();
// Nothing to do
return;
}
while (m_enqueued_count.load() != m_processed_count)
_mm_lfence();
}
void dma_manager::join()

View file

@ -42,8 +42,9 @@ namespace rsx
};
lf_queue<transport_packet> m_work_queue;
atomic_t<int> m_jobs_count;
thread_state m_worker_state;
atomic_t<u64> m_enqueued_count{ 0 };
volatile u64 m_processed_count = 0;
thread_state m_worker_state = thread_state::detached;
// TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes
const u32 max_immediate_transfer_size = 3584;

View file

@ -2241,6 +2241,9 @@ namespace rsx
// Fragment constants may have been updated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
// DMA sync; if you need this, don't use MTRSX
// g_dma_manager.sync();
//TODO: On sync every sub-unit should finish any pending tasks
//Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently
//verify (HERE), async_tasks_pending.load() == 0;