rsx: Fix offloader deadlock

- Do not allow offloader to handle its own faults. Serialize them on RSX instead.
  This approach introduces a GPU race condition that should be avoided with improved synchronization.
- TODO: Use proper GPU-side synchronization to avoid this situation
This commit is contained in:
kd-11 2019-08-25 18:47:49 +03:00 committed by kd-11
parent b70908c8f3
commit 9d981de96d
8 changed files with 183 additions and 72 deletions

View file

@ -895,7 +895,6 @@ void GLGSRender::on_init_thread()
m_video_output_pass.create(); m_video_output_pass.create();
m_gl_texture_cache.initialize(); m_gl_texture_cache.initialize();
m_thread_id = std::this_thread::get_id();
if (!supports_native_ui) if (!supports_native_ui)
{ {
@ -1821,7 +1820,7 @@ void GLGSRender::flip(int buffer, bool emu_flip)
bool GLGSRender::on_access_violation(u32 address, bool is_writing) bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{ {
const bool can_flush = (std::this_thread::get_id() == m_thread_id); const bool can_flush = (std::this_thread::get_id() == m_rsx_thread);
const rsx::invalidation_cause cause = const rsx::invalidation_cause cause =
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
@ -1848,14 +1847,13 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
return true; return true;
} }
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range) void GLGSRender::on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause)
{ {
//Discard all memory in that range without bothering with writeback (Force it for strict?)
gl::command_context cmd{ gl_state }; gl::command_context cmd{ gl_state };
auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::unmap)); auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, cause));
AUDIT(data.empty()); AUDIT(data.empty());
if (data.violation_handled) if (cause == rsx::invalidation_cause::unmap && data.violation_handled)
{ {
m_gl_texture_cache.purge_unreleased_sections(); m_gl_texture_cache.purge_unreleased_sections();
{ {
@ -1865,6 +1863,14 @@ void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
} }
} }
void GLGSRender::on_semaphore_acquire_wait()
{
if (!work_queue.empty())
{
do_local_task(rsx::FIFO_state::lock_wait);
}
}
void GLGSRender::do_local_task(rsx::FIFO_state state) void GLGSRender::do_local_task(rsx::FIFO_state state)
{ {
if (!work_queue.empty()) if (!work_queue.empty())

View file

@ -327,8 +327,6 @@ private:
shared_mutex queue_guard; shared_mutex queue_guard;
std::list<work_item> work_queue; std::list<work_item> work_queue;
std::thread::id m_thread_id;
GLProgramBuffer m_prog_buffer; GLProgramBuffer m_prog_buffer;
draw_context_t m_decompiler_context; draw_context_t m_decompiler_context;
@ -397,8 +395,9 @@ protected:
void do_local_task(rsx::FIFO_state state) override; void do_local_task(rsx::FIFO_state state) override;
bool on_access_violation(u32 address, bool is_writing) override; bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(const utils::address_range &range) override; void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
void notify_tile_unbound(u32 tile) override; void notify_tile_unbound(u32 tile) override;
void on_semaphore_acquire_wait() override;
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override; std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override; std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;

View file

@ -3,6 +3,8 @@
#include "Common/BufferUtils.h" #include "Common/BufferUtils.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "RSXOffload.h" #include "RSXOffload.h"
#include "RSXThread.h"
#include "rsx_utils.h"
#include <thread> #include <thread>
#include <atomic> #include <atomic>
@ -27,6 +29,9 @@ namespace rsx
return; return;
} }
// Register thread id
m_thread_id = std::this_thread::get_id();
if (g_cfg.core.thread_scheduler_enabled) if (g_cfg.core.thread_scheduler_enabled)
{ {
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
@ -36,22 +41,21 @@ namespace rsx
{ {
if (m_enqueued_count.load() != m_processed_count) if (m_enqueued_count.load() != m_processed_count)
{ {
for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front()) for (m_current_job = m_work_queue.pop_all(); m_current_job; m_current_job.pop_front())
{ {
auto task = *slice; switch (m_current_job->type)
switch (task.type)
{ {
case raw_copy: case raw_copy:
memcpy(task.dst, task.src, task.length); memcpy(m_current_job->dst, m_current_job->src, m_current_job->length);
break; break;
case vector_copy: case vector_copy:
memcpy(task.dst, task.opt_storage.data(), task.length); memcpy(m_current_job->dst, m_current_job->opt_storage.data(), m_current_job->length);
break; break;
case index_emulate: case index_emulate:
write_index_array_for_non_indexed_non_native_primitive_to_buffer( write_index_array_for_non_indexed_non_native_primitive_to_buffer(
reinterpret_cast<char*>(task.dst), reinterpret_cast<char*>(m_current_job->dst),
static_cast<rsx::primitive_type>(task.aux_param0), static_cast<rsx::primitive_type>(m_current_job->aux_param0),
task.length); m_current_job->length);
break; break;
default: default:
ASSUME(0); ASSUME(0);
@ -116,6 +120,11 @@ namespace rsx
} }
// Synchronization // Synchronization
bool dma_manager::is_current_thread() const
{
return (std::this_thread::get_id() == m_thread_id);
}
void dma_manager::sync() void dma_manager::sync()
{ {
if (LIKELY(m_enqueued_count.load() == m_processed_count)) if (LIKELY(m_enqueued_count.load() == m_processed_count))
@ -124,8 +133,25 @@ namespace rsx
return; return;
} }
while (m_enqueued_count.load() != m_processed_count) if (auto rsxthr = get_current_renderer(); rsxthr->is_current_thread())
_mm_pause(); {
if (m_mem_fault_flag)
{
// Abort if offloader is in recovery mode
return;
}
while (m_enqueued_count.load() != m_processed_count)
{
rsxthr->on_semaphore_acquire_wait();
_mm_pause();
}
}
else
{
while (m_enqueued_count.load() != m_processed_count)
_mm_pause();
}
} }
void dma_manager::join() void dma_manager::join()
@ -133,4 +159,50 @@ namespace rsx
m_worker_state = thread_state::finished; m_worker_state = thread_state::finished;
sync(); sync();
} }
void dma_manager::set_mem_fault_flag()
{
verify("Access denied" HERE), is_current_thread();
m_mem_fault_flag.release(true);
}
void dma_manager::clear_mem_fault_flag()
{
verify("Access denied" HERE), is_current_thread();
m_mem_fault_flag.release(false);
}
// Fault recovery
utils::address_range dma_manager::get_fault_range(bool writing) const
{
verify(HERE), m_current_job;
void *address = nullptr;
u32 range = m_current_job->length;
switch (m_current_job->type)
{
case raw_copy:
address = (writing) ? m_current_job->dst : m_current_job->src;
break;
case vector_copy:
verify(HERE), writing;
address = m_current_job->dst;
break;
case index_emulate:
verify(HERE), writing;
address = m_current_job->dst;
range = get_index_count(static_cast<rsx::primitive_type>(m_current_job->aux_param0), m_current_job->length);
break;
default:
ASSUME(0);
fmt::throw_exception("Unreachable" HERE);
}
const uintptr_t addr = uintptr_t(address);
const uintptr_t base = uintptr_t(vm::g_base_addr);
verify(HERE), addr > base;
return utils::address_range::start_length(u32(addr - base), range);
}
} }

View file

@ -3,9 +3,11 @@
#include "Utilities/types.h" #include "Utilities/types.h"
#include "Utilities/lockless.h" #include "Utilities/lockless.h"
#include "Utilities/Thread.h" #include "Utilities/Thread.h"
#include "Utilities/address_range.h"
#include "gcm_enums.h" #include "gcm_enums.h"
#include <vector> #include <vector>
#include <thread>
namespace rsx namespace rsx
{ {
@ -42,9 +44,12 @@ namespace rsx
}; };
lf_queue<transport_packet> m_work_queue; lf_queue<transport_packet> m_work_queue;
lf_queue_slice<transport_packet> m_current_job;
atomic_t<u64> m_enqueued_count{ 0 }; atomic_t<u64> m_enqueued_count{ 0 };
volatile u64 m_processed_count = 0; volatile u64 m_processed_count = 0;
thread_state m_worker_state = thread_state::detached; thread_state m_worker_state = thread_state::detached;
std::thread::id m_thread_id;
atomic_t<bool> m_mem_fault_flag{ false };
// TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes // TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes
const u32 max_immediate_transfer_size = 3584; const u32 max_immediate_transfer_size = 3584;
@ -63,8 +68,14 @@ namespace rsx
void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count); void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count);
// Synchronization // Synchronization
bool is_current_thread() const;
void sync(); void sync();
void join(); void join();
void set_mem_fault_flag();
void clear_mem_fault_flag();
// Fault recovery
utils::address_range get_fault_range(bool writing) const;
}; };
extern dma_manager g_dma_manager; extern dma_manager g_dma_manager;

View file

@ -917,27 +917,6 @@ namespace rsx
fmt::throw_exception("ill-formed draw command" HERE); fmt::throw_exception("ill-formed draw command" HERE);
} }
void thread::do_internal_task()
{
if (m_internal_tasks.empty())
{
std::this_thread::yield();
}
else
{
fmt::throw_exception("Disabled" HERE);
//std::lock_guard lock(m_mtx_task);
//internal_task_entry &front = m_internal_tasks.front();
//if (front.callback())
//{
// front.promise.set_value();
// m_internal_tasks.pop_front();
//}
}
}
void thread::do_local_task(FIFO_state state) void thread::do_local_task(FIFO_state state)
{ {
if (async_flip_requested & flip_request::emu_requested) if (async_flip_requested & flip_request::emu_requested)
@ -2465,7 +2444,7 @@ namespace rsx
if (!m_invalidated_memory_range.valid()) if (!m_invalidated_memory_range.valid())
return; return;
on_invalidate_memory_range(m_invalidated_memory_range); on_invalidate_memory_range(m_invalidated_memory_range, rsx::invalidation_cause::unmap);
m_invalidated_memory_range.invalidate(); m_invalidated_memory_range.invalidate();
} }

View file

@ -12,6 +12,7 @@
#include "rsx_methods.h" #include "rsx_methods.h"
#include "rsx_utils.h" #include "rsx_utils.h"
#include "Overlays/overlays.h" #include "Overlays/overlays.h"
#include "Common/texture_cache_utils.h"
#include "Utilities/Thread.h" #include "Utilities/Thread.h"
#include "Utilities/geometry.h" #include "Utilities/geometry.h"
@ -418,8 +419,8 @@ namespace rsx
protected: protected:
std::thread::id m_rsx_thread; std::thread::id m_rsx_thread;
atomic_t<bool> m_rsx_thread_exiting{true}; atomic_t<bool> m_rsx_thread_exiting{ true };
s32 m_return_addr{-1}, restore_ret{-1};
std::array<push_buffer_vertex_info, 16> vertex_push_buffers; std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
std::vector<u32> element_push_buffer; std::vector<u32> element_push_buffer;
@ -433,6 +434,7 @@ namespace rsx
// FIFO // FIFO
std::unique_ptr<FIFO::FIFO_control> fifo_ctrl; std::unique_ptr<FIFO::FIFO_control> fifo_ctrl;
FIFO::flattening_helper m_flattener; FIFO::flattening_helper m_flattener;
s32 m_return_addr{ -1 }, restore_ret{ -1 };
// Occlusion query // Occlusion query
bool zcull_surface_active = false; bool zcull_surface_active = false;
@ -605,7 +607,7 @@ namespace rsx
virtual void flip(int buffer, bool emu_flip = false) = 0; virtual void flip(int buffer, bool emu_flip = false) = 0;
virtual u64 timestamp(); virtual u64 timestamp();
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; } virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
virtual void on_invalidate_memory_range(const address_range & /*range*/) {} virtual void on_invalidate_memory_range(const address_range & /*range*/, rsx::invalidation_cause) {}
virtual void notify_tile_unbound(u32 /*tile*/) {} virtual void notify_tile_unbound(u32 /*tile*/) {}
// zcull // zcull
@ -661,18 +663,6 @@ namespace rsx
private: private:
shared_mutex m_mtx_task; shared_mutex m_mtx_task;
struct internal_task_entry
{
std::function<bool()> callback;
//std::promise<void> promise;
internal_task_entry(std::function<bool()> callback) : callback(std::move(callback))
{
}
};
std::deque<internal_task_entry> m_internal_tasks;
void do_internal_task();
void handle_emu_flip(u32 buffer); void handle_emu_flip(u32 buffer);
void handle_invalidated_memory_range(); void handle_invalidated_memory_range();
@ -732,7 +722,7 @@ namespace rsx
/** /**
* Notify to check internal state during semaphore wait * Notify to check internal state during semaphore wait
*/ */
void on_semaphore_acquire_wait() { do_local_task(FIFO_state::lock_wait); } virtual void on_semaphore_acquire_wait() {}
/** /**
* Copy rtt values to buffer. * Copy rtt values to buffer.
@ -767,7 +757,10 @@ namespace rsx
void pause(); void pause();
void unpause(); void unpause();
//Get RSX approximate load in % // Get RSX approximate load in %
u32 get_load(); u32 get_load();
// Returns true if the current thread is the active RSX thread
bool is_current_thread() const { return std::this_thread::get_id() == m_rsx_thread; }
}; };
} }

View file

@ -662,10 +662,29 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
if (result.num_flushable > 0) if (result.num_flushable > 0)
{ {
const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread; if (rsx::g_dma_manager.is_current_thread())
bool has_queue_ref = false; {
// The offloader thread cannot handle flush requests
verify(HERE), m_queue_status.load() == flush_queue_state::ok;
if (!is_rsxthr) m_offloader_fault_range = rsx::g_dma_manager.get_fault_range(is_writing);
m_offloader_fault_cause = (is_writing) ? rsx::invalidation_cause::write : rsx::invalidation_cause::read;
rsx::g_dma_manager.set_mem_fault_flag();
m_queue_status |= flush_queue_state::deadlock;
// Wait for deadlock to clear
while (m_queue_status & flush_queue_state::deadlock)
{
_mm_pause();
}
rsx::g_dma_manager.clear_mem_fault_flag();
return true;
}
bool has_queue_ref = false;
if (!is_current_thread())
{ {
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions) //Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
vm::temporary_unlock(); vm::temporary_unlock();
@ -703,14 +722,14 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
return true; return true;
} }
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range) void VKGSRender::on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause)
{ {
std::lock_guard lock(m_secondary_cb_guard); std::lock_guard lock(m_secondary_cb_guard);
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap)); auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, cause));
AUDIT(data.empty()); AUDIT(data.empty());
if (data.violation_handled) if (cause == rsx::invalidation_cause::unmap && data.violation_handled)
{ {
m_texture_cache.purge_unreleased_sections(); m_texture_cache.purge_unreleased_sections();
{ {
@ -720,6 +739,14 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
} }
} }
void VKGSRender::on_semaphore_acquire_wait()
{
if (m_flush_requests.pending() || m_queue_status & flush_queue_state::deadlock)
{
do_local_task(rsx::FIFO_state::lock_wait);
}
}
void VKGSRender::notify_tile_unbound(u32 tile) void VKGSRender::notify_tile_unbound(u32 tile)
{ {
//TODO: Handle texture writeback //TODO: Handle texture writeback
@ -2326,16 +2353,28 @@ void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources
void VKGSRender::do_local_task(rsx::FIFO_state state) void VKGSRender::do_local_task(rsx::FIFO_state state)
{ {
if (m_queue_status & flush_queue_state::deadlock)
{
// Clear offloader deadlock
// NOTE: It is not possible to handle regular flush requests before this is cleared
// NOTE: This may cause graphics corruption due to unsynchronized modification
flush_command_queue();
on_invalidate_memory_range(m_offloader_fault_range, m_offloader_fault_cause);
m_queue_status.clear(flush_queue_state::deadlock);
}
if (m_flush_requests.pending()) if (m_flush_requests.pending())
{ {
std::lock_guard lock(m_flush_queue_mutex); if (m_flush_queue_mutex.try_lock())
{
// TODO: Determine if a hard sync is necessary
// Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
flush_command_queue();
//TODO: Determine if a hard sync is necessary m_flush_requests.clear_pending_flag();
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline m_flush_requests.consumer_wait();
flush_command_queue(); m_flush_queue_mutex.unlock();
}
m_flush_requests.clear_pending_flag();
m_flush_requests.consumer_wait();
} }
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait) else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
{ {

View file

@ -294,6 +294,12 @@ struct flush_request_task
} }
}; };
enum flush_queue_state : u32
{
ok = 0,
deadlock = 1
};
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{ {
private: private:
@ -404,6 +410,11 @@ private:
shared_mutex m_flush_queue_mutex; shared_mutex m_flush_queue_mutex;
flush_request_task m_flush_requests; flush_request_task m_flush_requests;
// Offloader thread deadlock recovery
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
utils::address_range m_offloader_fault_range;
rsx::invalidation_cause m_offloader_fault_cause;
bool m_render_pass_open = false; bool m_render_pass_open = false;
u64 m_current_renderpass_key = 0; u64 m_current_renderpass_key = 0;
VkRenderPass m_cached_renderpass = VK_NULL_HANDLE; VkRenderPass m_cached_renderpass = VK_NULL_HANDLE;
@ -488,7 +499,8 @@ protected:
void notify_tile_unbound(u32 tile) override; void notify_tile_unbound(u32 tile) override;
bool on_access_violation(u32 address, bool is_writing) override; bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(const utils::address_range &range) override; void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
void on_semaphore_acquire_wait() override;
bool on_decompiler_task() override; bool on_decompiler_task() override;
}; };