rsx: Tweaks

- Optimize get_surface_subresource
- Add check_program_status time to draw call setup statistics. It can slow down games significantly
This commit is contained in:
kd-11 2017-10-29 19:34:55 +03:00
parent f7063bb57b
commit 31b07f2c5c
4 changed files with 105 additions and 60 deletions

View file

@ -557,52 +557,63 @@ namespace rsx
if (surface_address > texaddr) if (surface_address > texaddr)
return false; return false;
u32 offset = texaddr - surface_address; const u32 offset = texaddr - surface_address;
if (texaddr >= surface_address) if (offset == 0)
{ {
if (offset == 0) *x = 0;
{ *y = 0;
is_subslice = true; return true;
} }
else else
{ {
surface_format_info info; surface_format_info info;
Traits::get_surface_info(surface, &info); Traits::get_surface_info(surface, &info);
u32 range = info.rsx_pitch * info.surface_height; u32 range = info.rsx_pitch * info.surface_height;
if (double_height) range *= 2; if (double_height) range <<= 1;
if (offset < range) if (offset < range)
{
const u32 y = (offset / info.rsx_pitch);
u32 x = (offset % info.rsx_pitch) / info.bpp;
if (scale_to_fit)
{ {
const u32 y = (offset / info.rsx_pitch); const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
u32 x = (offset % info.rsx_pitch) / info.bpp; x = (u32)((f32)x / x_scale);
if (scale_to_fit)
{
const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
x = (u32)((f32)x / x_scale);
}
x_offset = x;
y_offset = y;
if (double_height) y_offset /= 2;
is_subslice = true;
} }
}
if (is_subslice) x_offset = x;
{ y_offset = y;
*x = x_offset;
*y = y_offset;
return true; if (double_height) y_offset /= 2;
is_subslice = true;
} }
} }
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
return true;
}
return false; return false;
} }
//Fast hit test
inline bool surface_overlaps_address_fast(surface_type surface, u32 surface_address, u32 texaddr)
{
if (surface_address > texaddr)
return false;
const u32 offset = texaddr - surface_address;
const u32 range = surface->get_rsx_pitch() * surface->get_surface_height();
return (offset < range);
}
bool address_is_bound(u32 address, bool is_depth) const bool address_is_bound(u32 address, bool is_depth) const
{ {
if (is_depth) if (is_depth)
@ -629,7 +640,8 @@ namespace rsx
return true; return true;
} }
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool double_height = false) surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch,
bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool ignore_color_formats = false, bool double_height = false)
{ {
auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped) auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped)
{ {
@ -638,12 +650,6 @@ namespace rsx
surface_format_info info; surface_format_info info;
Traits::get_surface_info(surface, &info); Traits::get_surface_info(surface, &info);
if (info.rsx_pitch != requested_pitch)
return false;
if (requested_width == 0 || requested_height == 0)
return true;
u16 real_width = requested_width; u16 real_width = requested_width;
if (scale_to_fit) if (scale_to_fit)
@ -696,26 +702,55 @@ namespace rsx
u16 w; u16 w;
u16 h; u16 h;
for (auto &tex_info : m_render_targets_storage) if (!ignore_color_formats)
{ {
u32 this_address = std::get<0>(tex_info); for (auto &tex_info : m_render_targets_storage)
surface = std::get<1>(tex_info).get(); {
const u32 this_address = std::get<0>(tex_info);
if (texaddr < this_address)
continue;
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) surface = std::get<1>(tex_info).get();
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped }; if (surface->get_rsx_pitch() != requested_pitch)
continue;
if (requested_width == 0 || requested_height == 0)
{
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
continue;
else
return{ surface, 0, 0, 0, 0, false, false, false };
}
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
}
} }
if (ignore_depth_formats) if (!ignore_depth_formats)
return{};
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
{ {
u32 this_address = std::get<0>(tex_info); //Check depth surfaces for overlap
surface = std::get<1>(tex_info).get(); for (auto &tex_info : m_depth_stencil_storage)
{
const u32 this_address = std::get<0>(tex_info);
if (texaddr < this_address)
continue;
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) surface = std::get<1>(tex_info).get();
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped }; if (surface->get_rsx_pitch() != requested_pitch)
continue;
if (requested_width == 0 || requested_height == 0)
{
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
continue;
else
return{ surface, 0, 0, 0, 0, false, true, false };
}
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
}
} }
return{}; return{};

View file

@ -1087,7 +1087,7 @@ namespace rsx
} }
//Check if src/dst are parts of render targets //Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, dst.compressed_y); auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, false, dst.compressed_y);
dst_is_render_target = dst_subres.surface != nullptr; dst_is_render_target = dst_subres.surface != nullptr;
if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch) if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch)
@ -1099,7 +1099,7 @@ namespace rsx
} }
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, src.compressed_y); auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, false, src.compressed_y);
src_is_render_target = src_subres.surface != nullptr; src_is_render_target = src_subres.surface != nullptr;
if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch) if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch)

View file

@ -318,12 +318,17 @@ namespace
void GLGSRender::end() void GLGSRender::end()
{ {
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state()) if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
{ {
rsx::thread::end(); rsx::thread::end();
return; return;
} }
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
{ {
//Use approximations to reseve space. This path is mostly for debug purposes anyway //Use approximations to reseve space. This path is mostly for debug purposes anyway
@ -964,7 +969,7 @@ bool GLGSRender::check_program_state()
if (dirty_framebuffer) if (dirty_framebuffer)
return std::make_tuple(false, 0); return std::make_tuple(false, 0);
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
if (!rsc.surface || rsc.is_depth_surface != is_depth) if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0); return std::make_tuple(false, 0);

View file

@ -995,6 +995,8 @@ void VKGSRender::end()
return; return;
} }
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
//Load program here since it is dependent on vertex state //Load program here since it is dependent on vertex state
if (!check_program_status()) if (!check_program_status())
{ {
@ -1003,14 +1005,17 @@ void VKGSRender::end()
return; return;
} }
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
//Programs data is dependent on vertex state //Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now(); std::chrono::time_point<steady_clock> vertex_start = state_check_end;
auto upload_info = upload_vertex_data(); auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now(); std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count(); m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
//Load program //Load program
std::chrono::time_point<steady_clock> program_start = steady_clock::now(); std::chrono::time_point<steady_clock> program_start = vertex_end;
load_program(std::get<2>(upload_info), std::get<3>(upload_info)); load_program(std::get<2>(upload_info), std::get<3>(upload_info));
std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count(); m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -1841,7 +1846,7 @@ bool VKGSRender::check_program_status()
if (dirty_framebuffer) if (dirty_framebuffer)
return std::make_tuple(false, 0); return std::make_tuple(false, 0);
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch()); auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
if (!rsc.surface || rsc.is_depth_surface != is_depth) if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0); return std::make_tuple(false, 0);