diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index e91f71dab6..727279fcad 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -183,7 +183,7 @@ void GLGSRender::begin() void GLGSRender::end() { - std::chrono::time_point state_check_start = steady_clock::now(); + m_profiler.start(); if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed)) @@ -193,8 +193,7 @@ void GLGSRender::end() return; } - std::chrono::time_point state_check_end = steady_clock::now(); - m_begin_time += (u32)std::chrono::duration_cast(state_check_end - state_check_start).count(); + m_begin_time += m_profiler.duration(); const auto do_heap_cleanup = [this]() { @@ -233,7 +232,7 @@ void GLGSRender::end() // Load textures { - std::chrono::time_point textures_start = steady_clock::now(); + m_profiler.start(); std::lock_guard lock(m_sampler_mutex); bool update_framebuffer_sourced = false; @@ -296,12 +295,9 @@ void GLGSRender::end() m_samplers_dirty.store(false); - std::chrono::time_point textures_end = steady_clock::now(); - m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); + m_textures_upload_time += m_profiler.duration(); } - std::chrono::time_point program_start = steady_clock::now(); - // NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible // TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip if (!load_program()) @@ -317,12 +313,9 @@ void GLGSRender::end() // Load program execution environment load_program_env(); - std::chrono::time_point program_stop = steady_clock::now(); - m_begin_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); + m_begin_time += m_profiler.duration(); //Bind textures and resolve external copy operations - std::chrono::time_point textures_start = steady_clock::now(); - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { if (current_fp_metadata.referenced_textures_mask & (1 << i)) @@ -394,10 +387,7 @@ void GLGSRender::end() } } - std::chrono::time_point textures_end = steady_clock::now(); - m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); - - std::chrono::time_point draw_start = textures_end; + m_textures_upload_time += m_profiler.duration(); // Optionally do memory synchronization if the texture stage has not yet triggered this if (true)//g_cfg.video.strict_rendering_mode) @@ -647,8 +637,7 @@ void GLGSRender::end() m_fragment_constants_buffer->notify(); m_transform_constants_buffer->notify(); - std::chrono::time_point draw_end = steady_clock::now(); - m_draw_time += (u32)std::chrono::duration_cast(draw_end - draw_start).count(); + m_draw_time += m_profiler.duration(); rsx::thread::end(); } @@ -1456,7 +1445,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) void GLGSRender::update_draw_state() { - std::chrono::time_point then = steady_clock::now(); + m_profiler.start(); bool color_mask_b = rsx::method_registers.color_mask_b(); bool color_mask_g = rsx::method_registers.color_mask_g(); @@ -1573,8 +1562,7 @@ void GLGSRender::update_draw_state() //NV4097_SET_ANTI_ALIASING_CONTROL //NV4097_SET_CLIP_ID_TEST_ENABLE - std::chrono::time_point now = steady_clock::now(); - m_begin_time += (u32)std::chrono::duration_cast(now - then).count(); + m_begin_time += m_profiler.duration(); } void GLGSRender::flip(int buffer, bool emu_flip) diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 511699e2c2..0e1e426bfc 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -152,7 +152,7 @@ namespace gl::vertex_upload_info GLGSRender::set_vertex_buffer() { - std::chrono::time_point then = steady_clock::now(); + m_profiler.start(); //Write index buffers and count verts auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); @@ -254,7 +254,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() //Write all the data write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); - std::chrono::time_point now = steady_clock::now(); - m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); + m_vertex_upload_time += m_profiler.duration(); return upload_info; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index a6c6befa07..3e7ac167ca 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -436,8 +436,8 @@ namespace rsx on_init_thread(); method_registers.init(); - g_dma_manager.init(); + m_profiler.enabled = !!g_cfg.video.overlay; if (!zcull_ctrl) { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 045520cf9e..bd445af7c0 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -446,6 +446,9 @@ namespace rsx // Draw call stats u32 m_draw_calls = 0; + // Profiler + rsx::profiling_timer m_profiler; + public: RsxDmaControl* ctrl = nullptr; u32 restore_point = 0; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 7452f762cf..9c2ca2a336 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -829,7 +829,7 @@ void VKGSRender::check_heap_status(u32 flags) if (heap_critical) { - std::chrono::time_point submit_start = steady_clock::now(); + m_profiler.start(); frame_context_t *target_frame = nullptr; if (!m_queued_frames.empty()) @@ -863,8 +863,7 @@ void VKGSRender::check_heap_status(u32 flags) frame_context_cleanup(target_frame, true); } - std::chrono::time_point submit_end = steady_clock::now(); - m_flip_time += std::chrono::duration_cast(submit_end - submit_start).count(); + m_flip_time += m_profiler.duration(); } } @@ -954,7 +953,7 @@ void VKGSRender::begin() void VKGSRender::update_draw_state() { - std::chrono::time_point start = steady_clock::now(); + m_profiler.start(); float actual_line_width = rsx::method_registers.line_width(); vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width); @@ -1010,8 +1009,7 @@ void VKGSRender::update_draw_state() //TODO: Set up other render-state parameters into the program pipeline - std::chrono::time_point stop = steady_clock::now(); - m_setup_time += std::chrono::duration_cast(stop - start).count(); + m_setup_time += m_profiler.duration(); } void VKGSRender::begin_render_pass() @@ -1046,7 +1044,7 @@ void VKGSRender::close_render_pass() void VKGSRender::emit_geometry(u32 sub_index) { auto &draw_call = rsx::method_registers.current_draw_clause; - //std::chrono::time_point vertex_start = steady_clock::now(); + m_profiler.start(); if (sub_index == 0) { @@ -1087,8 +1085,7 @@ void VKGSRender::emit_geometry(u32 sub_index) return; } - //std::chrono::time_point vertex_end = steady_clock::now(); - //m_vertex_upload_time += std::chrono::duration_cast(vertex_end - vertex_start).count(); + m_vertex_upload_time += m_profiler.duration(); auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; @@ -1145,8 +1142,7 @@ void VKGSRender::emit_geometry(u32 sub_index) // Bind the new set of descriptors for use with this draw call vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); - //std::chrono::time_point draw_start = steady_clock::now(); - //m_setup_time += std::chrono::duration_cast(draw_start - vertex_end).count(); + m_setup_time += m_profiler.duration(); if (!upload_info.index_info) { @@ -1190,8 +1186,7 @@ void VKGSRender::emit_geometry(u32 sub_index) } } - //std::chrono::time_point draw_end = steady_clock::now(); - //m_draw_time += std::chrono::duration_cast(draw_end - draw_start).count(); + m_draw_time += m_profiler.duration(); } void VKGSRender::end() @@ -1204,7 +1199,7 @@ void VKGSRender::end() return; } - std::chrono::time_point textures_start = steady_clock::now(); + m_profiler.start(); // Check for data casts // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better @@ -1448,10 +1443,8 @@ void VKGSRender::end() } } - std::chrono::time_point textures_end = steady_clock::now(); - m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); + m_textures_upload_time += m_profiler.duration(); - std::chrono::time_point program_start = textures_end; if (!load_program()) { // Program is not ready, skip drawing this @@ -1469,10 +1462,7 @@ void VKGSRender::end() // Load program execution environment load_program_env(); - std::chrono::time_point program_end = steady_clock::now(); - m_setup_time += std::chrono::duration_cast(program_end - program_start).count(); - - textures_start = program_end; + m_setup_time += m_profiler.duration(); for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { @@ -1675,8 +1665,7 @@ void VKGSRender::end() } } - textures_end = steady_clock::now(); - m_textures_upload_time += std::chrono::duration_cast(textures_end - textures_start).count(); + m_textures_upload_time += m_profiler.duration(); u32 occlusion_id = 0; if (m_occlusion_query_active) @@ -3128,7 +3117,7 @@ void VKGSRender::flip(int buffer, bool emu_flip) reinitialize_swapchain(); } - std::chrono::time_point flip_start = steady_clock::now(); + m_profiler.start(); if (m_current_frame == &m_aux_frame_context) { @@ -3441,8 +3430,7 @@ void VKGSRender::flip(int buffer, bool emu_flip) queue_swap_request(); - std::chrono::time_point flip_end = steady_clock::now(); - m_flip_time = std::chrono::duration_cast(flip_end - flip_start).count(); + m_flip_time = m_profiler.duration(); //NOTE:Resource destruction is handled within the real swap handler diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index d10f7fc82e..e07179f714 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -8,6 +8,7 @@ #include #include +#include extern "C" { @@ -1082,4 +1083,32 @@ namespace rsx return _data ? _data + _size : nullptr; } }; + + struct profiling_timer + { + bool enabled = false; + std::chrono::time_point last; + + profiling_timer() = default; + + void start() + { + if (UNLIKELY(enabled)) + { + last = steady_clock::now(); + } + } + + s64 duration() + { + if (LIKELY(!enabled)) + { + return 0ll; + } + + auto old = last; + last = steady_clock::now(); + return std::chrono::duration_cast(last - old).count(); + } + }; }