mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-08 16:01:42 +12:00
vk: Add the async task scheduler
This commit is contained in:
parent
cd6ef2958b
commit
77e312fb99
13 changed files with 292 additions and 25 deletions
|
@ -462,6 +462,7 @@ if(TARGET 3rdparty_vulkan)
|
|||
RSX/VK/vkutils/device.cpp
|
||||
RSX/VK/vkutils/sampler.cpp
|
||||
RSX/VK/vkutils/shared.cpp
|
||||
RSX/VK/VKAsyncScheduler.cpp
|
||||
RSX/VK/VKCommandStream.cpp
|
||||
RSX/VK/VKCommonDecompiler.cpp
|
||||
RSX/VK/VKCompute.cpp
|
||||
|
|
161
rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp
Normal file
161
rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp
Normal file
|
@ -0,0 +1,161 @@
|
|||
#include "VKAsyncScheduler.h"
|
||||
#include "VKHelpers.h"
|
||||
#include "VKResourceManager.h"
|
||||
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Utilities/lockless.h"
|
||||
#include "Utilities/mutex.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace vk
|
||||
{
|
||||
void AsyncTaskScheduler::operator()()
|
||||
{
|
||||
add_ref();
|
||||
|
||||
while (thread_ctrl::state() != thread_state::aborting)
|
||||
{
|
||||
for (auto&& job : m_event_queue.pop_all())
|
||||
{
|
||||
vk::wait_for_event(job->queue1_signal.get(), GENERAL_WAIT_TIMEOUT);
|
||||
job->queue2_signal->host_signal();
|
||||
}
|
||||
}
|
||||
|
||||
release();
|
||||
}
|
||||
|
||||
void AsyncTaskScheduler::delayed_init()
|
||||
{
|
||||
auto pdev = get_current_renderer();
|
||||
m_command_pool.create(*const_cast<render_device*>(pdev), pdev->get_transfer_queue_family());
|
||||
|
||||
for (usz i = 0; i < events_pool_size; ++i)
|
||||
{
|
||||
auto ev1 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
||||
auto ev2 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
||||
m_events_pool.emplace_back(std::move(ev1), std::move(ev2), 0ull);
|
||||
}
|
||||
}
|
||||
|
||||
void AsyncTaskScheduler::insert_sync_event()
|
||||
{
|
||||
ensure(m_current_cb);
|
||||
|
||||
xqueue_event* sync_label;
|
||||
ensure(m_next_event_id < events_pool_size);
|
||||
sync_label = &m_events_pool[m_next_event_id];
|
||||
|
||||
if (++m_next_event_id == events_pool_size)
|
||||
{
|
||||
// Wrap
|
||||
m_next_event_id = 0;
|
||||
}
|
||||
|
||||
ensure(sync_label->completion_eid <= vk::last_completed_event_id());
|
||||
|
||||
sync_label->queue1_signal->reset();
|
||||
sync_label->queue2_signal->reset();
|
||||
sync_label->completion_eid = vk::current_event_id();
|
||||
|
||||
sync_label->queue1_signal->signal(*m_current_cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0);
|
||||
|
||||
m_event_queue.push(sync_label);
|
||||
m_sync_label = sync_label->queue2_signal.get();
|
||||
}
|
||||
|
||||
AsyncTaskScheduler::~AsyncTaskScheduler()
|
||||
{
|
||||
*g_fxo->get<async_scheduler_thread>() = thread_state::aborting;
|
||||
while (has_refs()) _mm_pause();
|
||||
|
||||
for (auto& cb : m_async_command_queue)
|
||||
{
|
||||
cb.destroy();
|
||||
}
|
||||
|
||||
m_async_command_queue.clear();
|
||||
m_next_cb_index = 0;
|
||||
m_command_pool.destroy();
|
||||
m_events_pool.clear();
|
||||
}
|
||||
|
||||
command_buffer* AsyncTaskScheduler::get_current()
|
||||
{
|
||||
std::lock_guard lock(m_submit_mutex);
|
||||
m_sync_required = true;
|
||||
|
||||
// 0. Anything still active?
|
||||
if (m_current_cb)
|
||||
{
|
||||
return m_current_cb;
|
||||
}
|
||||
|
||||
// 1. Check if there is a 'next' entry
|
||||
auto pdev = get_current_renderer();
|
||||
if (m_async_command_queue.empty())
|
||||
{
|
||||
delayed_init();
|
||||
}
|
||||
else if (m_next_cb_index < m_async_command_queue.size())
|
||||
{
|
||||
m_current_cb = &m_async_command_queue[m_next_cb_index];
|
||||
}
|
||||
|
||||
// 2. Create entry
|
||||
if (!m_current_cb)
|
||||
{
|
||||
if (m_next_cb_index == VK_MAX_ASYNC_COMPUTE_QUEUES)
|
||||
{
|
||||
m_next_cb_index = 0;
|
||||
m_current_cb = &m_async_command_queue[m_next_cb_index];
|
||||
}
|
||||
else
|
||||
{
|
||||
m_async_command_queue.push_back({});
|
||||
m_current_cb = &m_async_command_queue.back();
|
||||
m_current_cb->create(m_command_pool, true);
|
||||
}
|
||||
}
|
||||
|
||||
m_next_cb_index++;
|
||||
return m_current_cb;
|
||||
}
|
||||
|
||||
event* AsyncTaskScheduler::get_primary_sync_label()
|
||||
{
|
||||
std::lock_guard lock(m_submit_mutex);
|
||||
|
||||
if (m_sync_required)
|
||||
{
|
||||
ensure(m_current_cb);
|
||||
insert_sync_event();
|
||||
m_sync_required = false;
|
||||
}
|
||||
|
||||
return std::exchange(m_sync_label, nullptr);
|
||||
}
|
||||
|
||||
void AsyncTaskScheduler::flush(VkSemaphore wait_semaphore, VkPipelineStageFlags wait_dst_stage_mask)
|
||||
{
|
||||
std::lock_guard lock(m_submit_mutex);
|
||||
|
||||
if (!m_current_cb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_sync_required)
|
||||
{
|
||||
insert_sync_event();
|
||||
}
|
||||
|
||||
m_current_cb->end();
|
||||
m_current_cb->submit(get_current_renderer()->get_transfer_queue(), wait_semaphore, VK_NULL_HANDLE, nullptr, wait_dst_stage_mask, VK_FALSE);
|
||||
|
||||
m_last_used_cb = m_current_cb;
|
||||
m_current_cb = nullptr;
|
||||
m_sync_required = false;
|
||||
}
|
||||
}
|
60
rpcs3/Emu/RSX/VK/VKAsyncScheduler.h
Normal file
60
rpcs3/Emu/RSX/VK/VKAsyncScheduler.h
Normal file
|
@ -0,0 +1,60 @@
|
|||
#pragma once
|
||||
|
||||
#include "vkutils/commands.h"
|
||||
#include "vkutils/sync.h"
|
||||
|
||||
#include "Utilities/Thread.h"
|
||||
|
||||
#define VK_MAX_ASYNC_COMPUTE_QUEUES 256
|
||||
|
||||
namespace vk
|
||||
{
|
||||
struct xqueue_event
|
||||
{
|
||||
std::unique_ptr<event> queue1_signal;
|
||||
std::unique_ptr<event> queue2_signal;
|
||||
u64 completion_eid;
|
||||
};
|
||||
|
||||
class AsyncTaskScheduler : private rsx::ref_counted
|
||||
{
|
||||
// Vulkan resources
|
||||
std::vector<command_buffer> m_async_command_queue;
|
||||
command_pool m_command_pool;
|
||||
|
||||
// Running state
|
||||
command_buffer* m_last_used_cb = nullptr;
|
||||
command_buffer* m_current_cb = nullptr;
|
||||
usz m_next_cb_index = 0;
|
||||
|
||||
// Sync
|
||||
event* m_sync_label = nullptr;
|
||||
bool m_sync_required = false;
|
||||
|
||||
static constexpr u32 events_pool_size = 16384;
|
||||
std::vector<xqueue_event> m_events_pool;
|
||||
atomic_t<u32> m_next_event_id = 0;
|
||||
|
||||
lf_queue<xqueue_event*> m_event_queue;
|
||||
shared_mutex m_submit_mutex;
|
||||
|
||||
void delayed_init();
|
||||
void insert_sync_event();
|
||||
|
||||
public:
|
||||
AsyncTaskScheduler() = default;
|
||||
~AsyncTaskScheduler();
|
||||
|
||||
command_buffer* get_current();
|
||||
event* get_primary_sync_label();
|
||||
|
||||
void flush(VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
|
||||
// Thread entry-point
|
||||
void operator()();
|
||||
|
||||
static constexpr auto thread_name = "Vulkan Async Scheduler"sv;
|
||||
};
|
||||
|
||||
using async_scheduler_thread = named_thread<AsyncTaskScheduler>;
|
||||
}
|
|
@ -1,13 +1,15 @@
|
|||
#include "stdafx.h"
|
||||
#include "../Overlays/overlay_shader_compile_notification.h"
|
||||
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
|
||||
#include "VKGSRender.h"
|
||||
#include "VKHelpers.h"
|
||||
|
||||
#include "VKAsyncScheduler.h"
|
||||
#include "VKCommandStream.h"
|
||||
#include "VKCommonDecompiler.h"
|
||||
#include "VKCompute.h"
|
||||
#include "VKGSRender.h"
|
||||
#include "VKHelpers.h"
|
||||
#include "VKRenderPass.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "VKCommandStream.h"
|
||||
|
||||
#include "vkutils/buffer_object.h"
|
||||
#include "vkutils/scratch.h"
|
||||
|
@ -501,6 +503,8 @@ VKGSRender::VKGSRender() : GSRender()
|
|||
|
||||
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.91");
|
||||
|
||||
g_fxo->init<vk::async_scheduler_thread>();
|
||||
|
||||
open_command_buffer();
|
||||
|
||||
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
|
||||
|
@ -1931,6 +1935,9 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
|||
const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
|
||||
const VkBool32 force_flush = !sync_success;
|
||||
|
||||
// Flush any asynchronously scheduled jobs
|
||||
g_fxo->get<vk::async_scheduler_thread>()->flush();
|
||||
|
||||
if (vk::test_status_interrupt(vk::heap_dirty))
|
||||
{
|
||||
if (m_attrib_ring_info.is_dirty() ||
|
||||
|
|
|
@ -68,7 +68,7 @@ namespace vk
|
|||
{
|
||||
upload_contents_async = 1,
|
||||
initialize_image_layout = 2,
|
||||
preserve_image_layout = 3,
|
||||
preserve_image_layout = 4,
|
||||
|
||||
// meta-flags
|
||||
upload_contents_inline = 0,
|
||||
|
|
|
@ -10,6 +10,7 @@ namespace vk
|
|||
|
||||
resource_manager g_resource_manager;
|
||||
atomic_t<u64> g_event_ctr;
|
||||
atomic_t<u64> g_last_completed_event;
|
||||
|
||||
constexpr u64 s_vmm_warn_threshold_size = 2000 * 0x100000; // Warn if allocation on a single heap exceeds this value
|
||||
|
||||
|
@ -28,6 +29,11 @@ namespace vk
|
|||
return g_event_ctr.load();
|
||||
}
|
||||
|
||||
u64 last_completed_event_id()
|
||||
{
|
||||
return g_last_completed_event.load();
|
||||
}
|
||||
|
||||
void on_event_completed(u64 event_id, bool flush)
|
||||
{
|
||||
if (!flush && g_cfg.video.multithreaded_rsx)
|
||||
|
@ -40,6 +46,7 @@ namespace vk
|
|||
}
|
||||
|
||||
g_resource_manager.eid_completed(event_id);
|
||||
g_last_completed_event = std::max(event_id, g_last_completed_event.load());
|
||||
}
|
||||
|
||||
static constexpr f32 size_in_GiB(u64 size)
|
||||
|
|
|
@ -11,6 +11,7 @@ namespace vk
|
|||
{
|
||||
u64 get_event_id();
|
||||
u64 current_event_id();
|
||||
u64 last_completed_event_id();
|
||||
void on_event_completed(u64 event_id, bool flush = false);
|
||||
|
||||
struct eid_scope_t
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
#include "stdafx.h"
|
||||
#include "VKHelpers.h"
|
||||
#include "VKFormats.h"
|
||||
#include "VKAsyncScheduler.h"
|
||||
#include "VKCompute.h"
|
||||
#include "VKDMA.h"
|
||||
#include "VKHelpers.h"
|
||||
#include "VKFormats.h"
|
||||
#include "VKRenderPass.h"
|
||||
#include "VKRenderTargets.h"
|
||||
|
||||
|
@ -800,12 +801,11 @@ namespace vk
|
|||
static const vk::command_buffer& prepare_for_transfer(const vk::command_buffer& primary_cb, vk::image* dst_image, rsx::flags32_t& flags)
|
||||
{
|
||||
const vk::command_buffer* pcmd = nullptr;
|
||||
#if 0
|
||||
if (flags & image_upload_options::upload_contents_async)
|
||||
{
|
||||
auto cb = vk::async_transfer_get_current();
|
||||
cb->begin();
|
||||
pcmd = cb;
|
||||
auto async_cmd = g_fxo->get<vk::async_scheduler_thread>()->get_current();
|
||||
async_cmd->begin();
|
||||
pcmd = async_cmd;
|
||||
|
||||
if (!(flags & image_upload_options::preserve_image_layout))
|
||||
{
|
||||
|
@ -813,7 +813,6 @@ namespace vk
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (vk::is_renderpass_open(primary_cb))
|
||||
{
|
||||
|
|
|
@ -137,7 +137,7 @@ namespace vk
|
|||
src->pop_layout(cmd);
|
||||
|
||||
// Create event object for this transfer and queue signal op
|
||||
dma_fence = std::make_unique<vk::event>(*m_device);
|
||||
dma_fence = std::make_unique<vk::event>(*m_device, sync_domain::any);
|
||||
dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
// Set cb flag for queued dma operations
|
||||
|
|
|
@ -58,10 +58,10 @@ namespace vk
|
|||
return (handle != VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
event::event(const render_device& dev)
|
||||
event::event(const render_device& dev, sync_domain domain)
|
||||
{
|
||||
m_device = dev;
|
||||
if (dev.gpu().get_driver_vendor() != driver_vendor::AMD)
|
||||
if (domain == sync_domain::gpu || dev.gpu().get_driver_vendor() != driver_vendor::AMD)
|
||||
{
|
||||
VkEventCreateInfo info
|
||||
{
|
||||
|
@ -75,14 +75,14 @@ namespace vk
|
|||
{
|
||||
// Work around AMD's broken event signals
|
||||
m_buffer = std::make_unique<buffer>
|
||||
(
|
||||
dev,
|
||||
4,
|
||||
dev.get_memory_mapping().host_visible_coherent,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
0
|
||||
);
|
||||
(
|
||||
dev,
|
||||
4,
|
||||
dev.get_memory_mapping().host_visible_coherent,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
0
|
||||
);
|
||||
|
||||
m_value = reinterpret_cast<u32*>(m_buffer->map(0, 4));
|
||||
*m_value = 0xCAFEBABE;
|
||||
|
@ -116,6 +116,24 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
void event::host_signal() const
|
||||
{
|
||||
ensure(m_vk_event);
|
||||
vkSetEvent(m_device, m_vk_event);
|
||||
}
|
||||
|
||||
void event::reset() const
|
||||
{
|
||||
if (m_vk_event) [[likely]]
|
||||
{
|
||||
vkResetEvent(m_device, m_vk_event);
|
||||
}
|
||||
else
|
||||
{
|
||||
*m_value = 0xCAFEBABE;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult event::status() const
|
||||
{
|
||||
if (m_vk_event) [[likely]]
|
||||
|
|
|
@ -10,6 +10,12 @@ namespace vk
|
|||
{
|
||||
class command_buffer;
|
||||
|
||||
enum class sync_domain
|
||||
{
|
||||
any = 0,
|
||||
gpu = 1
|
||||
};
|
||||
|
||||
struct fence
|
||||
{
|
||||
atomic_t<bool> flushed = false;
|
||||
|
@ -35,10 +41,13 @@ namespace vk
|
|||
volatile u32* m_value = nullptr;
|
||||
|
||||
public:
|
||||
event(const render_device& dev);
|
||||
event(const render_device& dev, sync_domain domain);
|
||||
~event();
|
||||
|
||||
void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access);
|
||||
void host_signal() const;
|
||||
VkResult status() const;
|
||||
void reset() const;
|
||||
};
|
||||
|
||||
VkResult wait_for_fence(fence* pFence, u64 timeout = 0ull);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Emu\RSX\VK\VKAsyncScheduler.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKCompute.h" />
|
||||
|
@ -64,6 +65,7 @@
|
|||
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\VK\VKAsyncScheduler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommonDecompiler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCompute.cpp" />
|
||||
|
|
|
@ -61,9 +61,10 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\VK\vkutils\image_helpers.cpp">
|
||||
<Filter>vkutils</Filter>
|
||||
</ClCompile>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\VK\VKOverlays.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCompute.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCompute.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKAsyncScheduler.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
|
||||
|
@ -149,6 +150,7 @@
|
|||
<ClInclude Include="Emu\RSX\VK\vkutils\image_helpers.h">
|
||||
<Filter>vkutils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\VK\VKAsyncScheduler.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="vkutils">
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue