vk: Optimize occlusion pool management

- Do not consume a slot every draw call, instead batch as many draws as possible
- Since renderpasses are dispatched per-draw-clause, keeping occlusion queries outside the renderpasses works fine
- If renderpasses are reorganized, occlusion tasks will have to be reorganized again
This commit is contained in:
kd-11 2019-06-19 00:26:25 +03:00 committed by kd-11
parent 1ee675e1f4
commit 8249d51aa8
3 changed files with 62 additions and 50 deletions

View file

@ -473,7 +473,9 @@ VKGSRender::VKGSRender() : GSRender()
//Occlusion //Occlusion
m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE); m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE);
for (int n = 0; n < 128; ++n) m_occlusion_map.resize(occlusion_query_count);
for (int n = 0; n < occlusion_query_count; ++n)
m_occlusion_query_data[n].driver_handle = n; m_occlusion_query_data[n].driver_handle = n;
//Generate frame contexts //Generate frame contexts
@ -1667,10 +1669,9 @@ void VKGSRender::end()
m_textures_upload_time += m_profiler.duration(); m_textures_upload_time += m_profiler.duration();
u32 occlusion_id = 0; if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task)
if (m_occlusion_query_active)
{ {
occlusion_id = m_occlusion_query_pool.find_free_slot(); u32 occlusion_id = m_occlusion_query_pool.find_free_slot();
if (occlusion_id == UINT32_MAX) if (occlusion_id == UINT32_MAX)
{ {
m_tsc += 100; m_tsc += 100;
@ -1683,21 +1684,21 @@ void VKGSRender::end()
if (m_current_task) m_current_task->result = 1; if (m_current_task) m_current_task->result = 1;
} }
} }
// Begin query
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
data.indices.push_back(occlusion_id);
data.command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
} }
bool primitive_emulated = false; bool primitive_emulated = false;
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{
//Begin query
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
}
// Apply write memory barriers // Apply write memory barriers
if (true)//g_cfg.video.strict_rendering_mode) if (true)//g_cfg.video.strict_rendering_mode)
{ {
@ -1768,12 +1769,6 @@ void VKGSRender::end()
close_render_pass(); close_render_pass();
vk::leave_uninterruptible(); vk::leave_uninterruptible();
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{
//End query
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
}
m_rtts.on_write(); m_rtts.on_write();
rsx::thread::end(); rsx::thread::end();
@ -2126,7 +2121,6 @@ void VKGSRender::clear_surface(u32 mask)
void VKGSRender::flush_command_queue(bool hard_sync) void VKGSRender::flush_command_queue(bool hard_sync)
{ {
rsx::g_dma_manager.sync();
close_and_submit_command_buffer(m_current_command_buffer->submit_fence); close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
if (hard_sync) if (hard_sync)
@ -2165,6 +2159,11 @@ void VKGSRender::flush_command_queue(bool hard_sync)
check_present_status(); check_present_status();
} }
if (m_occlusion_query_active)
{
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
}
open_command_buffer(); open_command_buffer();
} }
@ -2781,6 +2780,9 @@ void VKGSRender::write_buffers()
void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags) void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags)
{ {
// Wait before sync block below
rsx::g_dma_manager.sync();
if (m_attrib_ring_info.dirty() || if (m_attrib_ring_info.dirty() ||
m_fragment_env_ring_info.dirty() || m_fragment_env_ring_info.dirty() ||
m_vertex_env_ring_info.dirty() || m_vertex_env_ring_info.dirty() ||
@ -2810,6 +2812,13 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
} }
// End open queries. Flags will be automatically reset by the submit routine
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
}
m_current_command_buffer->end(); m_current_command_buffer->end();
m_current_command_buffer->tag(); m_current_command_buffer->tag();
@ -3473,18 +3482,33 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
{ {
verify(HERE), !m_occlusion_query_active;
query->result = 0; query->result = 0;
//query->sync_timestamp = get_system_time(); //query->sync_timestamp = get_system_time();
m_active_query_info = query; m_active_query_info = query;
m_occlusion_query_active = true; m_occlusion_query_active = true;
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
} }
void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query) void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
{ {
m_occlusion_query_active = false; verify(HERE), query == m_active_query_info;
m_active_query_info = nullptr;
// NOTE: flushing the queue is very expensive, do not flush just because query stopped // NOTE: flushing the queue is very expensive, do not flush just because query stopped
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
// End query
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
// Clear occlusion load flag
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
m_occlusion_query_active = false;
m_active_query_info = nullptr;
} }
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query) bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
@ -3492,11 +3516,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
if (!query->num_draws) if (!query->num_draws)
return true; return true;
auto found = m_occlusion_map.find(query->driver_handle); auto &data = m_occlusion_map[query->driver_handle];
if (found == m_occlusion_map.end())
return true;
auto &data = found->second;
if (data.indices.empty()) if (data.indices.empty())
return true; return true;
@ -3522,11 +3542,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query) void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
{ {
auto found = m_occlusion_map.find(query->driver_handle); auto &data = m_occlusion_map[query->driver_handle];
if (found == m_occlusion_map.end())
return;
auto &data = found->second;
if (data.indices.empty()) if (data.indices.empty())
return; return;
@ -3561,27 +3577,22 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
} }
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle); data.indices.clear();
} }
void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query) void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
{ {
if (m_active_query_info == query) if (m_active_query_info == query)
{ {
m_occlusion_query_active = false; end_occlusion_query(query);
m_active_query_info = nullptr;
} }
auto found = m_occlusion_map.find(query->driver_handle); auto &data = m_occlusion_map[query->driver_handle];
if (found == m_occlusion_map.end())
return;
auto &data = found->second;
if (data.indices.empty()) if (data.indices.empty())
return; return;
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle); data.indices.clear();
} }
bool VKGSRender::on_decompiler_task() bool VKGSRender::on_decompiler_task()

View file

@ -400,7 +400,7 @@ private:
vk::occlusion_query_pool m_occlusion_query_pool; vk::occlusion_query_pool m_occlusion_query_pool;
bool m_occlusion_query_active = false; bool m_occlusion_query_active = false;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr; rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::unordered_map<u32, occlusion_data> m_occlusion_map; std::vector<occlusion_data> m_occlusion_map;
shared_mutex m_secondary_cb_guard; shared_mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool; vk::command_pool m_secondary_command_buffer_pool;

View file

@ -891,7 +891,9 @@ private:
{ {
cb_has_occlusion_task = 1, cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2, cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4 cb_has_dma_transfer = 4,
cb_has_open_query = 8,
cb_load_occluson_task = 16
}; };
u32 flags = 0; u32 flags = 0;
@ -2725,7 +2727,7 @@ public:
VkQueryPool query_pool = VK_NULL_HANDLE; VkQueryPool query_pool = VK_NULL_HANDLE;
vk::render_device* owner = nullptr; vk::render_device* owner = nullptr;
std::deque<u32> available_slots; std::stack<u32> available_slots;
std::vector<bool> query_active_status; std::vector<bool> query_active_status;
public: public:
@ -2740,11 +2742,10 @@ public:
owner = &dev; owner = &dev;
query_active_status.resize(num_entries, false); query_active_status.resize(num_entries, false);
available_slots.resize(num_entries);
for (u32 n = 0; n < num_entries; ++n) for (u32 n = 0; n < num_entries; ++n)
{ {
available_slots[n] = n; available_slots.push(n);
} }
} }
@ -2807,7 +2808,7 @@ public:
vkCmdResetQueryPool(cmd, query_pool, index, 1); vkCmdResetQueryPool(cmd, query_pool, index, 1);
query_active_status[index] = false; query_active_status[index] = false;
available_slots.push_back(index); available_slots.push(index);
} }
} }
@ -2834,8 +2835,8 @@ public:
return ~0u; return ~0u;
} }
u32 result = available_slots.front(); u32 result = available_slots.top();
available_slots.pop_front(); available_slots.pop();
verify(HERE), !query_active_status[result]; verify(HERE), !query_active_status[result];
return result; return result;