mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 08:21:29 +12:00
vk: Latch query copy requests to reduce number of vulkan commands used
This commit is contained in:
parent
d08fea551e
commit
06459eb37b
3 changed files with 48 additions and 6 deletions
|
@ -2894,7 +2894,7 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
||||||
if (!query_info.indices.empty())
|
if (!query_info.indices.empty())
|
||||||
{
|
{
|
||||||
const auto& index = query_info.indices.front();
|
const auto& index = query_info.indices.front();
|
||||||
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
|
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, 1, m_cond_render_buffer->value, 0);
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage,
|
VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage,
|
||||||
|
@ -2912,16 +2912,58 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
||||||
{
|
{
|
||||||
// We'll need to do some result aggregation using a compute shader.
|
// We'll need to do some result aggregation using a compute shader.
|
||||||
auto scratch = vk::get_scratch_buffer(*m_current_command_buffer, num_hw_queries * 4);
|
auto scratch = vk::get_scratch_buffer(*m_current_command_buffer, num_hw_queries * 4);
|
||||||
|
|
||||||
|
// Range latching. Because of how the query pool manages allocations using a stack, we get an inverse sequential set of handles/indices that we can easily group together.
|
||||||
|
// This drastically boosts performance on some drivers like the NVIDIA proprietary one that seems to have a rather high cost for every individual query transer command.
|
||||||
|
std::pair<u32, u32> query_range = { umax, 0 };
|
||||||
|
|
||||||
|
auto copy_query_range_impl = [&]()
|
||||||
|
{
|
||||||
|
const auto count = (query_range.second - query_range.first + 1);
|
||||||
|
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, query_range.first, count, scratch->value, dst_offset);
|
||||||
|
dst_offset += count * 4;
|
||||||
|
};
|
||||||
|
|
||||||
for (usz i = first; i < last; ++i)
|
for (usz i = first; i < last; ++i)
|
||||||
{
|
{
|
||||||
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
|
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
|
||||||
for (const auto& index : query_info.indices)
|
for (const auto& index : query_info.indices)
|
||||||
{
|
{
|
||||||
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset);
|
// First iteration?
|
||||||
dst_offset += 4;
|
if (query_range.first == umax)
|
||||||
|
{
|
||||||
|
query_range = { index, index };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Head?
|
||||||
|
if ((query_range.first - 1) == index)
|
||||||
|
{
|
||||||
|
query_range.first = index;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tail?
|
||||||
|
if ((query_range.second + 1) == index)
|
||||||
|
{
|
||||||
|
query_range.second = index;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush pending queue. In practice, this is never reached and we fall out to the spill block outside the loops
|
||||||
|
copy_query_range_impl();
|
||||||
|
|
||||||
|
// Start a new range for the current index
|
||||||
|
query_range = { index, index };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (query_range.first != umax)
|
||||||
|
{
|
||||||
|
// Dangling queries, flush
|
||||||
|
copy_query_range_impl();
|
||||||
|
}
|
||||||
|
|
||||||
// Sanity check
|
// Sanity check
|
||||||
ensure(dst_offset <= scratch->size());
|
ensure(dst_offset <= scratch->size());
|
||||||
|
|
||||||
|
|
|
@ -168,11 +168,11 @@ namespace vk
|
||||||
return query_info.data;
|
return query_info.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_pool_manager::get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset)
|
void query_pool_manager::get_query_result_indirect(vk::command_buffer& cmd, u32 index, u32 count, VkBuffer dst, VkDeviceSize dst_offset)
|
||||||
{
|
{
|
||||||
// We're technically supposed to stop any active renderpasses before streaming the results out, but that doesn't matter on IMR hw
|
// We're technically supposed to stop any active renderpasses before streaming the results out, but that doesn't matter on IMR hw
|
||||||
// On TBDR setups like the apple M series, the stop is required (results are all 0 if you don't flush the RP), but this introduces a very heavy performance loss.
|
// On TBDR setups like the apple M series, the stop is required (results are all 0 if you don't flush the RP), but this introduces a very heavy performance loss.
|
||||||
vkCmdCopyQueryPoolResults(cmd, *query_slot_status[index].pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT);
|
vkCmdCopyQueryPoolResults(cmd, *query_slot_status[index].pool, index, count, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_pool_manager::free_query(vk::command_buffer&/*cmd*/, u32 index)
|
void query_pool_manager::free_query(vk::command_buffer&/*cmd*/, u32 index)
|
||||||
|
|
|
@ -47,7 +47,7 @@ namespace vk
|
||||||
|
|
||||||
bool check_query_status(u32 index);
|
bool check_query_status(u32 index);
|
||||||
u32 get_query_result(u32 index);
|
u32 get_query_result(u32 index);
|
||||||
void get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset);
|
void get_query_result_indirect(vk::command_buffer& cmd, u32 index, u32 count, VkBuffer dst, VkDeviceSize dst_offset);
|
||||||
|
|
||||||
u32 allocate_query(vk::command_buffer& cmd);
|
u32 allocate_query(vk::command_buffer& cmd);
|
||||||
void free_query(vk::command_buffer&/*cmd*/, u32 index);
|
void free_query(vk::command_buffer&/*cmd*/, u32 index);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue