mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 00:41:26 +12:00
rsx: Improve profiling setup
- Avoid spamming QPC when not needed - Free performance when debug overlay is not enabled
This commit is contained in:
parent
b893a75002
commit
d26b25816d
6 changed files with 58 additions and 51 deletions
|
@ -183,7 +183,7 @@ void GLGSRender::begin()
|
||||||
|
|
||||||
void GLGSRender::end()
|
void GLGSRender::end()
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
if (skip_frame || !framebuffer_status_valid ||
|
if (skip_frame || !framebuffer_status_valid ||
|
||||||
(conditional_render_enabled && conditional_render_test_failed))
|
(conditional_render_enabled && conditional_render_test_failed))
|
||||||
|
@ -193,8 +193,7 @@ void GLGSRender::end()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
m_begin_time += m_profiler.duration();
|
||||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
|
||||||
|
|
||||||
const auto do_heap_cleanup = [this]()
|
const auto do_heap_cleanup = [this]()
|
||||||
{
|
{
|
||||||
|
@ -233,7 +232,7 @@ void GLGSRender::end()
|
||||||
|
|
||||||
// Load textures
|
// Load textures
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
std::lock_guard lock(m_sampler_mutex);
|
std::lock_guard lock(m_sampler_mutex);
|
||||||
bool update_framebuffer_sourced = false;
|
bool update_framebuffer_sourced = false;
|
||||||
|
@ -296,12 +295,9 @@ void GLGSRender::end()
|
||||||
|
|
||||||
m_samplers_dirty.store(false);
|
m_samplers_dirty.store(false);
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
m_textures_upload_time += m_profiler.duration();
|
||||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
|
||||||
|
|
||||||
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
|
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
|
||||||
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
|
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
|
||||||
if (!load_program())
|
if (!load_program())
|
||||||
|
@ -317,12 +313,9 @@ void GLGSRender::end()
|
||||||
// Load program execution environment
|
// Load program execution environment
|
||||||
load_program_env();
|
load_program_env();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
m_begin_time += m_profiler.duration();
|
||||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
|
||||||
|
|
||||||
//Bind textures and resolve external copy operations
|
//Bind textures and resolve external copy operations
|
||||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
|
||||||
|
|
||||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||||
{
|
{
|
||||||
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
||||||
|
@ -394,10 +387,7 @@ void GLGSRender::end()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
m_textures_upload_time += m_profiler.duration();
|
||||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> draw_start = textures_end;
|
|
||||||
|
|
||||||
// Optionally do memory synchronization if the texture stage has not yet triggered this
|
// Optionally do memory synchronization if the texture stage has not yet triggered this
|
||||||
if (true)//g_cfg.video.strict_rendering_mode)
|
if (true)//g_cfg.video.strict_rendering_mode)
|
||||||
|
@ -647,8 +637,7 @@ void GLGSRender::end()
|
||||||
m_fragment_constants_buffer->notify();
|
m_fragment_constants_buffer->notify();
|
||||||
m_transform_constants_buffer->notify();
|
m_transform_constants_buffer->notify();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
m_draw_time += m_profiler.duration();
|
||||||
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
|
|
||||||
|
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
}
|
}
|
||||||
|
@ -1456,7 +1445,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
||||||
|
|
||||||
void GLGSRender::update_draw_state()
|
void GLGSRender::update_draw_state()
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
bool color_mask_b = rsx::method_registers.color_mask_b();
|
bool color_mask_b = rsx::method_registers.color_mask_b();
|
||||||
bool color_mask_g = rsx::method_registers.color_mask_g();
|
bool color_mask_g = rsx::method_registers.color_mask_g();
|
||||||
|
@ -1573,8 +1562,7 @@ void GLGSRender::update_draw_state()
|
||||||
//NV4097_SET_ANTI_ALIASING_CONTROL
|
//NV4097_SET_ANTI_ALIASING_CONTROL
|
||||||
//NV4097_SET_CLIP_ID_TEST_ENABLE
|
//NV4097_SET_CLIP_ID_TEST_ENABLE
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> now = steady_clock::now();
|
m_begin_time += m_profiler.duration();
|
||||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GLGSRender::flip(int buffer, bool emu_flip)
|
void GLGSRender::flip(int buffer, bool emu_flip)
|
||||||
|
|
|
@ -152,7 +152,7 @@ namespace
|
||||||
|
|
||||||
gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
//Write index buffers and count verts
|
//Write index buffers and count verts
|
||||||
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
||||||
|
@ -254,7 +254,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||||
//Write all the data
|
//Write all the data
|
||||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
|
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> now = steady_clock::now();
|
m_vertex_upload_time += m_profiler.duration();
|
||||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
|
||||||
return upload_info;
|
return upload_info;
|
||||||
}
|
}
|
||||||
|
|
|
@ -436,8 +436,8 @@ namespace rsx
|
||||||
on_init_thread();
|
on_init_thread();
|
||||||
|
|
||||||
method_registers.init();
|
method_registers.init();
|
||||||
|
|
||||||
g_dma_manager.init();
|
g_dma_manager.init();
|
||||||
|
m_profiler.enabled = !!g_cfg.video.overlay;
|
||||||
|
|
||||||
if (!zcull_ctrl)
|
if (!zcull_ctrl)
|
||||||
{
|
{
|
||||||
|
|
|
@ -446,6 +446,9 @@ namespace rsx
|
||||||
// Draw call stats
|
// Draw call stats
|
||||||
u32 m_draw_calls = 0;
|
u32 m_draw_calls = 0;
|
||||||
|
|
||||||
|
// Profiler
|
||||||
|
rsx::profiling_timer m_profiler;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RsxDmaControl* ctrl = nullptr;
|
RsxDmaControl* ctrl = nullptr;
|
||||||
u32 restore_point = 0;
|
u32 restore_point = 0;
|
||||||
|
|
|
@ -829,7 +829,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||||
|
|
||||||
if (heap_critical)
|
if (heap_critical)
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
frame_context_t *target_frame = nullptr;
|
frame_context_t *target_frame = nullptr;
|
||||||
if (!m_queued_frames.empty())
|
if (!m_queued_frames.empty())
|
||||||
|
@ -863,8 +863,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
||||||
frame_context_cleanup(target_frame, true);
|
frame_context_cleanup(target_frame, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
|
m_flip_time += m_profiler.duration();
|
||||||
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -954,7 +953,7 @@ void VKGSRender::begin()
|
||||||
|
|
||||||
void VKGSRender::update_draw_state()
|
void VKGSRender::update_draw_state()
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
float actual_line_width = rsx::method_registers.line_width();
|
float actual_line_width = rsx::method_registers.line_width();
|
||||||
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
|
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
|
||||||
|
@ -1010,8 +1009,7 @@ void VKGSRender::update_draw_state()
|
||||||
|
|
||||||
//TODO: Set up other render-state parameters into the program pipeline
|
//TODO: Set up other render-state parameters into the program pipeline
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> stop = steady_clock::now();
|
m_setup_time += m_profiler.duration();
|
||||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::begin_render_pass()
|
void VKGSRender::begin_render_pass()
|
||||||
|
@ -1046,7 +1044,7 @@ void VKGSRender::close_render_pass()
|
||||||
void VKGSRender::emit_geometry(u32 sub_index)
|
void VKGSRender::emit_geometry(u32 sub_index)
|
||||||
{
|
{
|
||||||
auto &draw_call = rsx::method_registers.current_draw_clause;
|
auto &draw_call = rsx::method_registers.current_draw_clause;
|
||||||
//std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
if (sub_index == 0)
|
if (sub_index == 0)
|
||||||
{
|
{
|
||||||
|
@ -1087,8 +1085,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
m_vertex_upload_time += m_profiler.duration();
|
||||||
//m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
|
||||||
|
|
||||||
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||||
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||||
|
@ -1145,8 +1142,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||||
// Bind the new set of descriptors for use with this draw call
|
// Bind the new set of descriptors for use with this draw call
|
||||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
||||||
|
|
||||||
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
m_setup_time += m_profiler.duration();
|
||||||
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
|
|
||||||
|
|
||||||
if (!upload_info.index_info)
|
if (!upload_info.index_info)
|
||||||
{
|
{
|
||||||
|
@ -1190,8 +1186,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
m_draw_time += m_profiler.duration();
|
||||||
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::end()
|
void VKGSRender::end()
|
||||||
|
@ -1204,7 +1199,7 @@ void VKGSRender::end()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
// Check for data casts
|
// Check for data casts
|
||||||
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
|
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
|
||||||
|
@ -1448,10 +1443,8 @@ void VKGSRender::end()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
m_textures_upload_time += m_profiler.duration();
|
||||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> program_start = textures_end;
|
|
||||||
if (!load_program())
|
if (!load_program())
|
||||||
{
|
{
|
||||||
// Program is not ready, skip drawing this
|
// Program is not ready, skip drawing this
|
||||||
|
@ -1469,10 +1462,7 @@ void VKGSRender::end()
|
||||||
// Load program execution environment
|
// Load program execution environment
|
||||||
load_program_env();
|
load_program_env();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
|
m_setup_time += m_profiler.duration();
|
||||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
|
|
||||||
|
|
||||||
textures_start = program_end;
|
|
||||||
|
|
||||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||||
{
|
{
|
||||||
|
@ -1675,8 +1665,7 @@ void VKGSRender::end()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
textures_end = steady_clock::now();
|
m_textures_upload_time += m_profiler.duration();
|
||||||
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
|
||||||
|
|
||||||
u32 occlusion_id = 0;
|
u32 occlusion_id = 0;
|
||||||
if (m_occlusion_query_active)
|
if (m_occlusion_query_active)
|
||||||
|
@ -3128,7 +3117,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
|
||||||
reinitialize_swapchain();
|
reinitialize_swapchain();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> flip_start = steady_clock::now();
|
m_profiler.start();
|
||||||
|
|
||||||
if (m_current_frame == &m_aux_frame_context)
|
if (m_current_frame == &m_aux_frame_context)
|
||||||
{
|
{
|
||||||
|
@ -3441,8 +3430,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
|
||||||
|
|
||||||
queue_swap_request();
|
queue_swap_request();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> flip_end = steady_clock::now();
|
m_flip_time = m_profiler.duration();
|
||||||
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
|
|
||||||
|
|
||||||
//NOTE:Resource destruction is handled within the real swap handler
|
//NOTE:Resource destruction is handled within the real swap handler
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
|
@ -1082,4 +1083,32 @@ namespace rsx
|
||||||
return _data ? _data + _size : nullptr;
|
return _data ? _data + _size : nullptr;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct profiling_timer
|
||||||
|
{
|
||||||
|
bool enabled = false;
|
||||||
|
std::chrono::time_point<steady_clock> last;
|
||||||
|
|
||||||
|
profiling_timer() = default;
|
||||||
|
|
||||||
|
void start()
|
||||||
|
{
|
||||||
|
if (UNLIKELY(enabled))
|
||||||
|
{
|
||||||
|
last = steady_clock::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 duration()
|
||||||
|
{
|
||||||
|
if (LIKELY(!enabled))
|
||||||
|
{
|
||||||
|
return 0ll;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto old = last;
|
||||||
|
last = steady_clock::now();
|
||||||
|
return std::chrono::duration_cast<std::chrono::microseconds>(last - old).count();
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue