rsx: Improve profiling setup

- Avoid spamming QPC when not needed
- Free performance when debug overlay is not enabled
This commit is contained in:
kd-11 2019-06-18 21:31:35 +03:00 committed by kd-11
parent b893a75002
commit d26b25816d
6 changed files with 58 additions and 51 deletions

View file

@ -183,7 +183,7 @@ void GLGSRender::begin()
void GLGSRender::end() void GLGSRender::end()
{ {
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now(); m_profiler.start();
if (skip_frame || !framebuffer_status_valid || if (skip_frame || !framebuffer_status_valid ||
(conditional_render_enabled && conditional_render_test_failed)) (conditional_render_enabled && conditional_render_test_failed))
@ -193,8 +193,7 @@ void GLGSRender::end()
return; return;
} }
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now(); m_begin_time += m_profiler.duration();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
const auto do_heap_cleanup = [this]() const auto do_heap_cleanup = [this]()
{ {
@ -233,7 +232,7 @@ void GLGSRender::end()
// Load textures // Load textures
{ {
std::chrono::time_point<steady_clock> textures_start = steady_clock::now(); m_profiler.start();
std::lock_guard lock(m_sampler_mutex); std::lock_guard lock(m_sampler_mutex);
bool update_framebuffer_sourced = false; bool update_framebuffer_sourced = false;
@ -296,12 +295,9 @@ void GLGSRender::end()
m_samplers_dirty.store(false); m_samplers_dirty.store(false);
std::chrono::time_point<steady_clock> textures_end = steady_clock::now(); m_textures_upload_time += m_profiler.duration();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
} }
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible // NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip // TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
if (!load_program()) if (!load_program())
@ -317,12 +313,9 @@ void GLGSRender::end()
// Load program execution environment // Load program execution environment
load_program_env(); load_program_env();
std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); m_begin_time += m_profiler.duration();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
//Bind textures and resolve external copy operations //Bind textures and resolve external copy operations
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{ {
if (current_fp_metadata.referenced_textures_mask & (1 << i)) if (current_fp_metadata.referenced_textures_mask & (1 << i))
@ -394,10 +387,7 @@ void GLGSRender::end()
} }
} }
std::chrono::time_point<steady_clock> textures_end = steady_clock::now(); m_textures_upload_time += m_profiler.duration();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
std::chrono::time_point<steady_clock> draw_start = textures_end;
// Optionally do memory synchronization if the texture stage has not yet triggered this // Optionally do memory synchronization if the texture stage has not yet triggered this
if (true)//g_cfg.video.strict_rendering_mode) if (true)//g_cfg.video.strict_rendering_mode)
@ -647,8 +637,7 @@ void GLGSRender::end()
m_fragment_constants_buffer->notify(); m_fragment_constants_buffer->notify();
m_transform_constants_buffer->notify(); m_transform_constants_buffer->notify();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); m_draw_time += m_profiler.duration();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
rsx::thread::end(); rsx::thread::end();
} }
@ -1456,7 +1445,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
void GLGSRender::update_draw_state() void GLGSRender::update_draw_state()
{ {
std::chrono::time_point<steady_clock> then = steady_clock::now(); m_profiler.start();
bool color_mask_b = rsx::method_registers.color_mask_b(); bool color_mask_b = rsx::method_registers.color_mask_b();
bool color_mask_g = rsx::method_registers.color_mask_g(); bool color_mask_g = rsx::method_registers.color_mask_g();
@ -1573,8 +1562,7 @@ void GLGSRender::update_draw_state()
//NV4097_SET_ANTI_ALIASING_CONTROL //NV4097_SET_ANTI_ALIASING_CONTROL
//NV4097_SET_CLIP_ID_TEST_ENABLE //NV4097_SET_CLIP_ID_TEST_ENABLE
std::chrono::time_point<steady_clock> now = steady_clock::now(); m_begin_time += m_profiler.duration();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
} }
void GLGSRender::flip(int buffer, bool emu_flip) void GLGSRender::flip(int buffer, bool emu_flip)

View file

@ -152,7 +152,7 @@ namespace
gl::vertex_upload_info GLGSRender::set_vertex_buffer() gl::vertex_upload_info GLGSRender::set_vertex_buffer()
{ {
std::chrono::time_point<steady_clock> then = steady_clock::now(); m_profiler.start();
//Write index buffers and count verts //Write index buffers and count verts
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
@ -254,7 +254,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
//Write all the data //Write all the data
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
std::chrono::time_point<steady_clock> now = steady_clock::now(); m_vertex_upload_time += m_profiler.duration();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
return upload_info; return upload_info;
} }

View file

@ -436,8 +436,8 @@ namespace rsx
on_init_thread(); on_init_thread();
method_registers.init(); method_registers.init();
g_dma_manager.init(); g_dma_manager.init();
m_profiler.enabled = !!g_cfg.video.overlay;
if (!zcull_ctrl) if (!zcull_ctrl)
{ {

View file

@ -446,6 +446,9 @@ namespace rsx
// Draw call stats // Draw call stats
u32 m_draw_calls = 0; u32 m_draw_calls = 0;
// Profiler
rsx::profiling_timer m_profiler;
public: public:
RsxDmaControl* ctrl = nullptr; RsxDmaControl* ctrl = nullptr;
u32 restore_point = 0; u32 restore_point = 0;

View file

@ -829,7 +829,7 @@ void VKGSRender::check_heap_status(u32 flags)
if (heap_critical) if (heap_critical)
{ {
std::chrono::time_point<steady_clock> submit_start = steady_clock::now(); m_profiler.start();
frame_context_t *target_frame = nullptr; frame_context_t *target_frame = nullptr;
if (!m_queued_frames.empty()) if (!m_queued_frames.empty())
@ -863,8 +863,7 @@ void VKGSRender::check_heap_status(u32 flags)
frame_context_cleanup(target_frame, true); frame_context_cleanup(target_frame, true);
} }
std::chrono::time_point<steady_clock> submit_end = steady_clock::now(); m_flip_time += m_profiler.duration();
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
} }
} }
@ -954,7 +953,7 @@ void VKGSRender::begin()
void VKGSRender::update_draw_state() void VKGSRender::update_draw_state()
{ {
std::chrono::time_point<steady_clock> start = steady_clock::now(); m_profiler.start();
float actual_line_width = rsx::method_registers.line_width(); float actual_line_width = rsx::method_registers.line_width();
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width); vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
@ -1010,8 +1009,7 @@ void VKGSRender::update_draw_state()
//TODO: Set up other render-state parameters into the program pipeline //TODO: Set up other render-state parameters into the program pipeline
std::chrono::time_point<steady_clock> stop = steady_clock::now(); m_setup_time += m_profiler.duration();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
} }
void VKGSRender::begin_render_pass() void VKGSRender::begin_render_pass()
@ -1046,7 +1044,7 @@ void VKGSRender::close_render_pass()
void VKGSRender::emit_geometry(u32 sub_index) void VKGSRender::emit_geometry(u32 sub_index)
{ {
auto &draw_call = rsx::method_registers.current_draw_clause; auto &draw_call = rsx::method_registers.current_draw_clause;
//std::chrono::time_point<steady_clock> vertex_start = steady_clock::now(); m_profiler.start();
if (sub_index == 0) if (sub_index == 0)
{ {
@ -1087,8 +1085,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
return; return;
} }
//std::chrono::time_point<steady_clock> vertex_end = steady_clock::now(); m_vertex_upload_time += m_profiler.duration();
//m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
@ -1145,8 +1142,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
// Bind the new set of descriptors for use with this draw call // Bind the new set of descriptors for use with this draw call
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now(); m_setup_time += m_profiler.duration();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
if (!upload_info.index_info) if (!upload_info.index_info)
{ {
@ -1190,8 +1186,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
} }
} }
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); m_draw_time += m_profiler.duration();
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
} }
void VKGSRender::end() void VKGSRender::end()
@ -1204,7 +1199,7 @@ void VKGSRender::end()
return; return;
} }
std::chrono::time_point<steady_clock> textures_start = steady_clock::now(); m_profiler.start();
// Check for data casts // Check for data casts
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
@ -1448,10 +1443,8 @@ void VKGSRender::end()
} }
} }
std::chrono::time_point<steady_clock> textures_end = steady_clock::now(); m_textures_upload_time += m_profiler.duration();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
std::chrono::time_point<steady_clock> program_start = textures_end;
if (!load_program()) if (!load_program())
{ {
// Program is not ready, skip drawing this // Program is not ready, skip drawing this
@ -1469,10 +1462,7 @@ void VKGSRender::end()
// Load program execution environment // Load program execution environment
load_program_env(); load_program_env();
std::chrono::time_point<steady_clock> program_end = steady_clock::now(); m_setup_time += m_profiler.duration();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
textures_start = program_end;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{ {
@ -1675,8 +1665,7 @@ void VKGSRender::end()
} }
} }
textures_end = steady_clock::now(); m_textures_upload_time += m_profiler.duration();
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
u32 occlusion_id = 0; u32 occlusion_id = 0;
if (m_occlusion_query_active) if (m_occlusion_query_active)
@ -3128,7 +3117,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
reinitialize_swapchain(); reinitialize_swapchain();
} }
std::chrono::time_point<steady_clock> flip_start = steady_clock::now(); m_profiler.start();
if (m_current_frame == &m_aux_frame_context) if (m_current_frame == &m_aux_frame_context)
{ {
@ -3441,8 +3430,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
queue_swap_request(); queue_swap_request();
std::chrono::time_point<steady_clock> flip_end = steady_clock::now(); m_flip_time = m_profiler.duration();
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
//NOTE:Resource destruction is handled within the real swap handler //NOTE:Resource destruction is handled within the real swap handler

View file

@ -8,6 +8,7 @@
#include <memory> #include <memory>
#include <bitset> #include <bitset>
#include <chrono>
extern "C" extern "C"
{ {
@ -1082,4 +1083,32 @@ namespace rsx
return _data ? _data + _size : nullptr; return _data ? _data + _size : nullptr;
} }
}; };
struct profiling_timer
{
bool enabled = false;
std::chrono::time_point<steady_clock> last;
profiling_timer() = default;
void start()
{
if (UNLIKELY(enabled))
{
last = steady_clock::now();
}
}
s64 duration()
{
if (LIKELY(!enabled))
{
return 0ll;
}
auto old = last;
last = steady_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(last - old).count();
}
};
} }