mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 00:41:26 +12:00
rsx: Improve profiling setup
- Avoid spamming QPC when not needed - Free performance when debug overlay is not enabled
This commit is contained in:
parent
b893a75002
commit
d26b25816d
6 changed files with 58 additions and 51 deletions
|
@ -183,7 +183,7 @@ void GLGSRender::begin()
|
|||
|
||||
void GLGSRender::end()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
if (skip_frame || !framebuffer_status_valid ||
|
||||
(conditional_render_enabled && conditional_render_test_failed))
|
||||
|
@ -193,8 +193,7 @@ void GLGSRender::end()
|
|||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||
m_begin_time += m_profiler.duration();
|
||||
|
||||
const auto do_heap_cleanup = [this]()
|
||||
{
|
||||
|
@ -233,7 +232,7 @@ void GLGSRender::end()
|
|||
|
||||
// Load textures
|
||||
{
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
bool update_framebuffer_sourced = false;
|
||||
|
@ -296,12 +295,9 @@ void GLGSRender::end()
|
|||
|
||||
m_samplers_dirty.store(false);
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
m_textures_upload_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
||||
|
||||
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
|
||||
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
|
||||
if (!load_program())
|
||||
|
@ -317,12 +313,9 @@ void GLGSRender::end()
|
|||
// Load program execution environment
|
||||
load_program_env();
|
||||
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
m_begin_time += m_profiler.duration();
|
||||
|
||||
//Bind textures and resolve external copy operations
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
||||
|
@ -394,10 +387,7 @@ void GLGSRender::end()
|
|||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
|
||||
std::chrono::time_point<steady_clock> draw_start = textures_end;
|
||||
m_textures_upload_time += m_profiler.duration();
|
||||
|
||||
// Optionally do memory synchronization if the texture stage has not yet triggered this
|
||||
if (true)//g_cfg.video.strict_rendering_mode)
|
||||
|
@ -647,8 +637,7 @@ void GLGSRender::end()
|
|||
m_fragment_constants_buffer->notify();
|
||||
m_transform_constants_buffer->notify();
|
||||
|
||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
|
||||
m_draw_time += m_profiler.duration();
|
||||
|
||||
rsx::thread::end();
|
||||
}
|
||||
|
@ -1456,7 +1445,7 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
|||
|
||||
void GLGSRender::update_draw_state()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
bool color_mask_b = rsx::method_registers.color_mask_b();
|
||||
bool color_mask_g = rsx::method_registers.color_mask_g();
|
||||
|
@ -1573,8 +1562,7 @@ void GLGSRender::update_draw_state()
|
|||
//NV4097_SET_ANTI_ALIASING_CONTROL
|
||||
//NV4097_SET_CLIP_ID_TEST_ENABLE
|
||||
|
||||
std::chrono::time_point<steady_clock> now = steady_clock::now();
|
||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
m_begin_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void GLGSRender::flip(int buffer, bool emu_flip)
|
||||
|
|
|
@ -152,7 +152,7 @@ namespace
|
|||
|
||||
gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
//Write index buffers and count verts
|
||||
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
||||
|
@ -254,7 +254,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
|||
//Write all the data
|
||||
write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first);
|
||||
|
||||
std::chrono::time_point<steady_clock> now = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
m_vertex_upload_time += m_profiler.duration();
|
||||
return upload_info;
|
||||
}
|
||||
|
|
|
@ -436,8 +436,8 @@ namespace rsx
|
|||
on_init_thread();
|
||||
|
||||
method_registers.init();
|
||||
|
||||
g_dma_manager.init();
|
||||
m_profiler.enabled = !!g_cfg.video.overlay;
|
||||
|
||||
if (!zcull_ctrl)
|
||||
{
|
||||
|
|
|
@ -446,6 +446,9 @@ namespace rsx
|
|||
// Draw call stats
|
||||
u32 m_draw_calls = 0;
|
||||
|
||||
// Profiler
|
||||
rsx::profiling_timer m_profiler;
|
||||
|
||||
public:
|
||||
RsxDmaControl* ctrl = nullptr;
|
||||
u32 restore_point = 0;
|
||||
|
|
|
@ -829,7 +829,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
|||
|
||||
if (heap_critical)
|
||||
{
|
||||
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
frame_context_t *target_frame = nullptr;
|
||||
if (!m_queued_frames.empty())
|
||||
|
@ -863,8 +863,7 @@ void VKGSRender::check_heap_status(u32 flags)
|
|||
frame_context_cleanup(target_frame, true);
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
|
||||
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
|
||||
m_flip_time += m_profiler.duration();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -954,7 +953,7 @@ void VKGSRender::begin()
|
|||
|
||||
void VKGSRender::update_draw_state()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
float actual_line_width = rsx::method_registers.line_width();
|
||||
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
|
||||
|
@ -1010,8 +1009,7 @@ void VKGSRender::update_draw_state()
|
|||
|
||||
//TODO: Set up other render-state parameters into the program pipeline
|
||||
|
||||
std::chrono::time_point<steady_clock> stop = steady_clock::now();
|
||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
|
||||
m_setup_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void VKGSRender::begin_render_pass()
|
||||
|
@ -1046,7 +1044,7 @@ void VKGSRender::close_render_pass()
|
|||
void VKGSRender::emit_geometry(u32 sub_index)
|
||||
{
|
||||
auto &draw_call = rsx::method_registers.current_draw_clause;
|
||||
//std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
if (sub_index == 0)
|
||||
{
|
||||
|
@ -1087,8 +1085,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
|||
return;
|
||||
}
|
||||
|
||||
//std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||
//m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||
m_vertex_upload_time += m_profiler.duration();
|
||||
|
||||
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||
|
@ -1145,8 +1142,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
|||
// Bind the new set of descriptors for use with this draw call
|
||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
||||
|
||||
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
||||
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
|
||||
m_setup_time += m_profiler.duration();
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
|
@ -1190,8 +1186,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
|
|||
}
|
||||
}
|
||||
|
||||
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
|
||||
m_draw_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void VKGSRender::end()
|
||||
|
@ -1204,7 +1199,7 @@ void VKGSRender::end()
|
|||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
// Check for data casts
|
||||
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
|
||||
|
@ -1448,10 +1443,8 @@ void VKGSRender::end()
|
|||
}
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
m_textures_upload_time += m_profiler.duration();
|
||||
|
||||
std::chrono::time_point<steady_clock> program_start = textures_end;
|
||||
if (!load_program())
|
||||
{
|
||||
// Program is not ready, skip drawing this
|
||||
|
@ -1469,10 +1462,7 @@ void VKGSRender::end()
|
|||
// Load program execution environment
|
||||
load_program_env();
|
||||
|
||||
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
|
||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
|
||||
|
||||
textures_start = program_end;
|
||||
m_setup_time += m_profiler.duration();
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
|
@ -1675,8 +1665,7 @@ void VKGSRender::end()
|
|||
}
|
||||
}
|
||||
|
||||
textures_end = steady_clock::now();
|
||||
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||
m_textures_upload_time += m_profiler.duration();
|
||||
|
||||
u32 occlusion_id = 0;
|
||||
if (m_occlusion_query_active)
|
||||
|
@ -3128,7 +3117,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
|
|||
reinitialize_swapchain();
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> flip_start = steady_clock::now();
|
||||
m_profiler.start();
|
||||
|
||||
if (m_current_frame == &m_aux_frame_context)
|
||||
{
|
||||
|
@ -3441,8 +3430,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
|
|||
|
||||
queue_swap_request();
|
||||
|
||||
std::chrono::time_point<steady_clock> flip_end = steady_clock::now();
|
||||
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
|
||||
m_flip_time = m_profiler.duration();
|
||||
|
||||
//NOTE:Resource destruction is handled within the real swap handler
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
@ -1082,4 +1083,32 @@ namespace rsx
|
|||
return _data ? _data + _size : nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct profiling_timer
|
||||
{
|
||||
bool enabled = false;
|
||||
std::chrono::time_point<steady_clock> last;
|
||||
|
||||
profiling_timer() = default;
|
||||
|
||||
void start()
|
||||
{
|
||||
if (UNLIKELY(enabled))
|
||||
{
|
||||
last = steady_clock::now();
|
||||
}
|
||||
}
|
||||
|
||||
s64 duration()
|
||||
{
|
||||
if (LIKELY(!enabled))
|
||||
{
|
||||
return 0ll;
|
||||
}
|
||||
|
||||
auto old = last;
|
||||
last = steady_clock::now();
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(last - old).count();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue