rsx: Improve profiling setup

- Avoid spamming QPC when not needed
- Free performance when debug overlay is not enabled
This commit is contained in:
kd-11 2019-06-18 21:31:35 +03:00 committed by kd-11
parent b893a75002
commit d26b25816d
6 changed files with 58 additions and 51 deletions

View file

@ -183,7 +183,7 @@ void GLGSRender::begin()
void GLGSRender::end()
{
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
m_profiler.start();
if (skip_frame || !framebuffer_status_valid ||
(conditional_render_enabled && conditional_render_test_failed))
@ -193,8 +193,7 @@ void GLGSRender::end()
return;
}
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
m_begin_time += m_profiler.duration();
const auto do_heap_cleanup = [this]()
{
@ -233,7 +232,7 @@ void GLGSRender::end()
// Load textures
{
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
m_profiler.start();
std::lock_guard lock(m_sampler_mutex);
bool update_framebuffer_sourced = false;
@ -296,12 +295,9 @@ void GLGSRender::end()
m_samplers_dirty.store(false);
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
m_textures_upload_time += m_profiler.duration();
}
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
if (!load_program())
@ -317,12 +313,9 @@ void GLGSRender::end()
// Load program execution environment
load_program_env();
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
m_begin_time += m_profiler.duration();
//Bind textures and resolve external copy operations
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
if (current_fp_metadata.referenced_textures_mask & (1 << i))
@ -394,10 +387,7 @@ void GLGSRender::end()
}
}
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
std::chrono::time_point<steady_clock> draw_start = textures_end;
m_textures_upload_time += m_profiler.duration();
// Optionally do memory synchronization if the texture stage has not yet triggered this
if (true)//g_cfg.video.strict_rendering_mode)
@ -647,8 +637,7 @@ void GLGSRender::end()
m_fragment_constants_buffer->notify();
m_transform_constants_buffer->notify();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
m_draw_time += m_profiler.duration();
rsx::thread::end();
}
@ -1456,7 +1445,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
void GLGSRender::update_draw_state()
{
std::chrono::time_point<steady_clock> then = steady_clock::now();
m_profiler.start();
bool color_mask_b = rsx::method_registers.color_mask_b();
bool color_mask_g = rsx::method_registers.color_mask_g();
@ -1573,8 +1562,7 @@ void GLGSRender::update_draw_state()
//NV4097_SET_ANTI_ALIASING_CONTROL
//NV4097_SET_CLIP_ID_TEST_ENABLE
std::chrono::time_point<steady_clock> now = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_begin_time += m_profiler.duration();
}
void GLGSRender::flip(int buffer, bool emu_flip)

View file

@ -152,7 +152,7 @@ namespace
gl::vertex_upload_info GLGSRender::set_vertex_buffer()
{
std::chrono::time_point<steady_clock> then = steady_clock::now();
m_profiler.start();
//Write index buffers and count verts
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
@ -254,7 +254,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
//Write all the data
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
std::chrono::time_point<steady_clock> now = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_vertex_upload_time += m_profiler.duration();
return upload_info;
}

View file

@ -436,8 +436,8 @@ namespace rsx
on_init_thread();
method_registers.init();
g_dma_manager.init();
m_profiler.enabled = !!g_cfg.video.overlay;
if (!zcull_ctrl)
{

View file

@ -446,6 +446,9 @@ namespace rsx
// Draw call stats
u32 m_draw_calls = 0;
// Profiler
rsx::profiling_timer m_profiler;
public:
RsxDmaControl* ctrl = nullptr;
u32 restore_point = 0;

View file

@ -829,7 +829,7 @@ void VKGSRender::check_heap_status(u32 flags)
if (heap_critical)
{
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
m_profiler.start();
frame_context_t *target_frame = nullptr;
if (!m_queued_frames.empty())
@ -863,8 +863,7 @@ void VKGSRender::check_heap_status(u32 flags)
frame_context_cleanup(target_frame, true);
}
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
m_flip_time += m_profiler.duration();
}
}
@ -954,7 +953,7 @@ void VKGSRender::begin()
void VKGSRender::update_draw_state()
{
std::chrono::time_point<steady_clock> start = steady_clock::now();
m_profiler.start();
float actual_line_width = rsx::method_registers.line_width();
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
@ -1010,8 +1009,7 @@ void VKGSRender::update_draw_state()
//TODO: Set up other render-state parameters into the program pipeline
std::chrono::time_point<steady_clock> stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
m_setup_time += m_profiler.duration();
}
void VKGSRender::begin_render_pass()
@ -1046,7 +1044,7 @@ void VKGSRender::close_render_pass()
void VKGSRender::emit_geometry(u32 sub_index)
{
auto &draw_call = rsx::method_registers.current_draw_clause;
//std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
m_profiler.start();
if (sub_index == 0)
{
@ -1087,8 +1085,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
return;
}
//std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
//m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
m_vertex_upload_time += m_profiler.duration();
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
@ -1145,8 +1142,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
// Bind the new set of descriptors for use with this draw call
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
m_setup_time += m_profiler.duration();
if (!upload_info.index_info)
{
@ -1190,8 +1186,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
}
}
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
m_draw_time += m_profiler.duration();
}
void VKGSRender::end()
@ -1204,7 +1199,7 @@ void VKGSRender::end()
return;
}
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
m_profiler.start();
// Check for data casts
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
@ -1448,10 +1443,8 @@ void VKGSRender::end()
}
}
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
m_textures_upload_time += m_profiler.duration();
std::chrono::time_point<steady_clock> program_start = textures_end;
if (!load_program())
{
// Program is not ready, skip drawing this
@ -1469,10 +1462,7 @@ void VKGSRender::end()
// Load program execution environment
load_program_env();
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
textures_start = program_end;
m_setup_time += m_profiler.duration();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
@ -1675,8 +1665,7 @@ void VKGSRender::end()
}
}
textures_end = steady_clock::now();
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
m_textures_upload_time += m_profiler.duration();
u32 occlusion_id = 0;
if (m_occlusion_query_active)
@ -3128,7 +3117,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
reinitialize_swapchain();
}
std::chrono::time_point<steady_clock> flip_start = steady_clock::now();
m_profiler.start();
if (m_current_frame == &m_aux_frame_context)
{
@ -3441,8 +3430,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
queue_swap_request();
std::chrono::time_point<steady_clock> flip_end = steady_clock::now();
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
m_flip_time = m_profiler.duration();
//NOTE:Resource destruction is handled within the real swap handler

View file

@ -8,6 +8,7 @@
#include <memory>
#include <bitset>
#include <chrono>
extern "C"
{
@ -1082,4 +1083,32 @@ namespace rsx
return _data ? _data + _size : nullptr;
}
};
struct profiling_timer
{
bool enabled = false;
std::chrono::time_point<steady_clock> last;
profiling_timer() = default;
void start()
{
if (UNLIKELY(enabled))
{
last = steady_clock::now();
}
}
s64 duration()
{
if (LIKELY(!enabled))
{
return 0ll;
}
auto old = last;
last = steady_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(last - old).count();
}
};
}