mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 08:21:29 +12:00
vk/rsx: Tuning and optimization for host labels
This commit is contained in:
parent
24587ab459
commit
da559b5568
3 changed files with 90 additions and 29 deletions
|
@ -616,14 +616,21 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
|
|
||||||
if (backend_config.supports_host_gpu_labels)
|
if (backend_config.supports_host_gpu_labels)
|
||||||
{
|
{
|
||||||
m_host_object_data = std::make_unique<vk::buffer>(*m_device,
|
if (backend_config.supports_passthrough_dma)
|
||||||
0x100000,
|
{
|
||||||
memory_map.device_bar, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
m_host_object_data = std::make_unique<vk::buffer>(*m_device,
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0,
|
0x10000,
|
||||||
VMM_ALLOCATION_POOL_SYSTEM);
|
memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0,
|
||||||
|
VMM_ALLOCATION_POOL_SYSTEM);
|
||||||
|
|
||||||
m_host_data_ptr = new (m_host_object_data->map(0, 0x100000)) vk::host_data_t();
|
m_host_data_ptr = new (m_host_object_data->map(0, 0x100000)) vk::host_data_t();
|
||||||
ensure(m_host_data_ptr->magic == 0xCAFEBABE);
|
ensure(m_host_data_ptr->magic == 0xCAFEBABE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rsx_log.error("Your GPU/driver does not support extensions required to enable passthrough DMA emulation. Host GPU labels will be disabled.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1493,20 +1500,67 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
|
||||||
{
|
{
|
||||||
// All texture loads already seen by the host GPU
|
// All texture loads already seen by the host GPU
|
||||||
// Wait for all previously submitted labels to be flushed
|
// Wait for all previously submitted labels to be flushed
|
||||||
while (m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
|
if (m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
|
||||||
{
|
{
|
||||||
_mm_pause();
|
//const u64 wait_start = utils::get_tsc();
|
||||||
|
|
||||||
|
while (m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
|
||||||
|
{
|
||||||
|
_mm_pause();
|
||||||
|
|
||||||
|
if (thread_ctrl::state() == thread_state::aborting)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//const u64 now = utils::get_tsc();
|
||||||
|
//const u64 divisor = utils::get_tsc_freq() / 1000000;
|
||||||
|
//const u64 full_duration = (now - m_host_data_ptr->last_label_request_timestamp) / divisor;
|
||||||
|
//const u64 wait_duration = (now - wait_start) / divisor;
|
||||||
|
//rsx_log.error("GPU sync took [%llu, %llu] microseconds to complete", full_duration, wait_duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_host_data_ptr->last_label_release_event = ++m_host_data_ptr->event_counter;
|
|
||||||
|
|
||||||
const auto mapping = vk::map_dma(address, 4);
|
const auto mapping = vk::map_dma(address, 4);
|
||||||
const auto write_data = std::bit_cast<u32, be_t<u32>>(args);
|
const auto write_data = std::bit_cast<u32, be_t<u32>>(args);
|
||||||
vkCmdUpdateBuffer(*m_current_command_buffer, mapping.second->value, mapping.first, 4, &write_data);
|
|
||||||
flush_command_queue();
|
if (!dynamic_cast<vk::memory_block_host*>(mapping.second->memory.get()))
|
||||||
|
{
|
||||||
|
// NVIDIA GPUs can disappoint when DMA blocks straddle VirtualAlloc boundaries.
|
||||||
|
// Take the L and try the fallback.
|
||||||
|
rsx_log.warning("Host label update at 0x%x was not possible.", address);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_host_data_ptr->last_label_release_event = ++m_host_data_ptr->event_counter;
|
||||||
|
//m_host_data_ptr->last_label_request_timestamp = utils::get_tsc();
|
||||||
|
|
||||||
|
if (m_host_data_ptr->texture_load_request_event > m_host_data_ptr->last_label_submit_event)
|
||||||
|
{
|
||||||
|
if (vk::is_renderpass_open(*m_current_command_buffer))
|
||||||
|
{
|
||||||
|
vk::end_renderpass(*m_current_command_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
vkCmdUpdateBuffer(*m_current_command_buffer, mapping.second->value, mapping.first, 4, &write_data);
|
||||||
|
flush_command_queue();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto cmd = m_secondary_cb_list.next();
|
||||||
|
cmd->begin();
|
||||||
|
vkCmdUpdateBuffer(*cmd, mapping.second->value, mapping.first, 4, &write_data);
|
||||||
|
vkCmdUpdateBuffer(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
|
||||||
|
cmd->end();
|
||||||
|
|
||||||
|
vk::queue_submit_t submit_info = { m_device->get_graphics_queue(), nullptr };
|
||||||
|
cmd->submit(submit_info);
|
||||||
|
|
||||||
|
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2145,13 +2199,15 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
|
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_host_data_ptr && m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
|
if (m_host_data_ptr && m_host_data_ptr->last_label_release_event > m_host_data_ptr->last_label_submit_event)
|
||||||
{
|
{
|
||||||
vkCmdUpdateBuffer(*m_current_command_buffer,
|
vkCmdUpdateBuffer(*m_current_command_buffer,
|
||||||
m_host_object_data->value,
|
m_host_object_data->value,
|
||||||
::offset32(&vk::host_data_t::commands_complete_event),
|
::offset32(&vk::host_data_t::commands_complete_event),
|
||||||
sizeof(u64),
|
sizeof(u64),
|
||||||
const_cast<u64*>(&m_host_data_ptr->event_counter));
|
const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
|
||||||
|
|
||||||
|
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_current_command_buffer->end();
|
m_current_command_buffer->end();
|
||||||
|
|
|
@ -23,7 +23,9 @@ namespace vk
|
||||||
u64 texture_load_request_event = 0;
|
u64 texture_load_request_event = 0;
|
||||||
u64 texture_load_complete_event = 0;
|
u64 texture_load_complete_event = 0;
|
||||||
u64 last_label_release_event = 0;
|
u64 last_label_release_event = 0;
|
||||||
|
u64 last_label_submit_event = 0;
|
||||||
u64 commands_complete_event = 0;
|
u64 commands_complete_event = 0;
|
||||||
|
u64 last_label_request_timestamp = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct fence
|
struct fence
|
||||||
|
|
|
@ -35,24 +35,27 @@ namespace rsx
|
||||||
const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->label_addr + 0x30));
|
const bool is_flip_sema = (address == (rsx->label_addr + 0x10) || address == (rsx->label_addr + 0x30));
|
||||||
if (!is_flip_sema)
|
if (!is_flip_sema)
|
||||||
{
|
{
|
||||||
if constexpr (FlushPipe)
|
// First, queue the GPU work. If it flushes the queue for us, the following routines will be faster.
|
||||||
{
|
const bool handled = rsx->get_backend_config().supports_host_gpu_labels && rsx->release_GCM_label(address, data);
|
||||||
// Ignoring these can cause very poor performance due to timestamp queries taking too long.
|
|
||||||
rsx->sync();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rsx->get_backend_config().supports_host_gpu_labels &&
|
|
||||||
rsx->release_GCM_label(address, data))
|
|
||||||
{
|
|
||||||
// Backend will handle it, nothing to do.
|
|
||||||
// Implicitly handles DMA sync.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if constexpr (FlushDMA)
|
if constexpr (FlushDMA)
|
||||||
{
|
{
|
||||||
|
// If the backend handled the request, this call will basically be a NOP
|
||||||
g_fxo->get<rsx::dma_manager>().sync();
|
g_fxo->get<rsx::dma_manager>().sync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if constexpr (FlushPipe)
|
||||||
|
{
|
||||||
|
// Manually flush the pipeline.
|
||||||
|
// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
|
||||||
|
rsx->sync();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (handled)
|
||||||
|
{
|
||||||
|
// Backend will handle it, nothing to write.
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vm::_ref<RsxSemaphore>(address).val = data;
|
vm::_ref<RsxSemaphore>(address).val = data;
|
||||||
|
@ -255,7 +258,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
write_gcm_label<true, false>(rsx, get_address(offset, method_registers.semaphore_context_dma_4097()), arg);
|
write_gcm_label<false, false>(rsx, get_address(offset, method_registers.semaphore_context_dma_4097()), arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue