mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 08:21:29 +12:00
rsx: Tweaks
- Optimize get_surface_subresource - Add check_program_status time to draw call setup statistics. It can slow down games significantly
This commit is contained in:
parent
f7063bb57b
commit
31b07f2c5c
4 changed files with 105 additions and 60 deletions
|
@ -557,52 +557,63 @@ namespace rsx
|
||||||
if (surface_address > texaddr)
|
if (surface_address > texaddr)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
u32 offset = texaddr - surface_address;
|
const u32 offset = texaddr - surface_address;
|
||||||
if (texaddr >= surface_address)
|
if (offset == 0)
|
||||||
{
|
{
|
||||||
if (offset == 0)
|
*x = 0;
|
||||||
{
|
*y = 0;
|
||||||
is_subslice = true;
|
return true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
surface_format_info info;
|
surface_format_info info;
|
||||||
Traits::get_surface_info(surface, &info);
|
Traits::get_surface_info(surface, &info);
|
||||||
|
|
||||||
u32 range = info.rsx_pitch * info.surface_height;
|
u32 range = info.rsx_pitch * info.surface_height;
|
||||||
if (double_height) range *= 2;
|
if (double_height) range <<= 1;
|
||||||
|
|
||||||
if (offset < range)
|
if (offset < range)
|
||||||
|
{
|
||||||
|
const u32 y = (offset / info.rsx_pitch);
|
||||||
|
u32 x = (offset % info.rsx_pitch) / info.bpp;
|
||||||
|
|
||||||
|
if (scale_to_fit)
|
||||||
{
|
{
|
||||||
const u32 y = (offset / info.rsx_pitch);
|
const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
|
||||||
u32 x = (offset % info.rsx_pitch) / info.bpp;
|
x = (u32)((f32)x / x_scale);
|
||||||
|
|
||||||
if (scale_to_fit)
|
|
||||||
{
|
|
||||||
const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
|
|
||||||
x = (u32)((f32)x / x_scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
x_offset = x;
|
|
||||||
y_offset = y;
|
|
||||||
|
|
||||||
if (double_height) y_offset /= 2;
|
|
||||||
is_subslice = true;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (is_subslice)
|
x_offset = x;
|
||||||
{
|
y_offset = y;
|
||||||
*x = x_offset;
|
|
||||||
*y = y_offset;
|
|
||||||
|
|
||||||
return true;
|
if (double_height) y_offset /= 2;
|
||||||
|
is_subslice = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_subslice)
|
||||||
|
{
|
||||||
|
*x = x_offset;
|
||||||
|
*y = y_offset;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Fast hit test
|
||||||
|
inline bool surface_overlaps_address_fast(surface_type surface, u32 surface_address, u32 texaddr)
|
||||||
|
{
|
||||||
|
if (surface_address > texaddr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const u32 offset = texaddr - surface_address;
|
||||||
|
const u32 range = surface->get_rsx_pitch() * surface->get_surface_height();
|
||||||
|
|
||||||
|
return (offset < range);
|
||||||
|
}
|
||||||
|
|
||||||
bool address_is_bound(u32 address, bool is_depth) const
|
bool address_is_bound(u32 address, bool is_depth) const
|
||||||
{
|
{
|
||||||
if (is_depth)
|
if (is_depth)
|
||||||
|
@ -629,7 +640,8 @@ namespace rsx
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool double_height = false)
|
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch,
|
||||||
|
bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool ignore_color_formats = false, bool double_height = false)
|
||||||
{
|
{
|
||||||
auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped)
|
auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped)
|
||||||
{
|
{
|
||||||
|
@ -638,12 +650,6 @@ namespace rsx
|
||||||
surface_format_info info;
|
surface_format_info info;
|
||||||
Traits::get_surface_info(surface, &info);
|
Traits::get_surface_info(surface, &info);
|
||||||
|
|
||||||
if (info.rsx_pitch != requested_pitch)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (requested_width == 0 || requested_height == 0)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
u16 real_width = requested_width;
|
u16 real_width = requested_width;
|
||||||
|
|
||||||
if (scale_to_fit)
|
if (scale_to_fit)
|
||||||
|
@ -696,26 +702,55 @@ namespace rsx
|
||||||
u16 w;
|
u16 w;
|
||||||
u16 h;
|
u16 h;
|
||||||
|
|
||||||
for (auto &tex_info : m_render_targets_storage)
|
if (!ignore_color_formats)
|
||||||
{
|
{
|
||||||
u32 this_address = std::get<0>(tex_info);
|
for (auto &tex_info : m_render_targets_storage)
|
||||||
surface = std::get<1>(tex_info).get();
|
{
|
||||||
|
const u32 this_address = std::get<0>(tex_info);
|
||||||
|
if (texaddr < this_address)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
surface = std::get<1>(tex_info).get();
|
||||||
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
|
if (surface->get_rsx_pitch() != requested_pitch)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (requested_width == 0 || requested_height == 0)
|
||||||
|
{
|
||||||
|
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
return{ surface, 0, 0, 0, 0, false, false, false };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
||||||
|
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ignore_depth_formats)
|
if (!ignore_depth_formats)
|
||||||
return{};
|
|
||||||
|
|
||||||
//Check depth surfaces for overlap
|
|
||||||
for (auto &tex_info : m_depth_stencil_storage)
|
|
||||||
{
|
{
|
||||||
u32 this_address = std::get<0>(tex_info);
|
//Check depth surfaces for overlap
|
||||||
surface = std::get<1>(tex_info).get();
|
for (auto &tex_info : m_depth_stencil_storage)
|
||||||
|
{
|
||||||
|
const u32 this_address = std::get<0>(tex_info);
|
||||||
|
if (texaddr < this_address)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
surface = std::get<1>(tex_info).get();
|
||||||
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
|
if (surface->get_rsx_pitch() != requested_pitch)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (requested_width == 0 || requested_height == 0)
|
||||||
|
{
|
||||||
|
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
return{ surface, 0, 0, 0, 0, false, true, false };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
||||||
|
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return{};
|
return{};
|
||||||
|
|
|
@ -1087,7 +1087,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
|
|
||||||
//Check if src/dst are parts of render targets
|
//Check if src/dst are parts of render targets
|
||||||
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, dst.compressed_y);
|
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, false, dst.compressed_y);
|
||||||
dst_is_render_target = dst_subres.surface != nullptr;
|
dst_is_render_target = dst_subres.surface != nullptr;
|
||||||
|
|
||||||
if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch)
|
if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch)
|
||||||
|
@ -1099,7 +1099,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
||||||
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, src.compressed_y);
|
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, false, src.compressed_y);
|
||||||
src_is_render_target = src_subres.surface != nullptr;
|
src_is_render_target = src_subres.surface != nullptr;
|
||||||
|
|
||||||
if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch)
|
if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch)
|
||||||
|
|
|
@ -318,12 +318,17 @@ namespace
|
||||||
|
|
||||||
void GLGSRender::end()
|
void GLGSRender::end()
|
||||||
{
|
{
|
||||||
|
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
||||||
|
|
||||||
if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
|
if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
|
||||||
{
|
{
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||||
|
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||||
|
|
||||||
if (manually_flush_ring_buffers)
|
if (manually_flush_ring_buffers)
|
||||||
{
|
{
|
||||||
//Use approximations to reseve space. This path is mostly for debug purposes anyway
|
//Use approximations to reseve space. This path is mostly for debug purposes anyway
|
||||||
|
@ -964,7 +969,7 @@ bool GLGSRender::check_program_state()
|
||||||
if (dirty_framebuffer)
|
if (dirty_framebuffer)
|
||||||
return std::make_tuple(false, 0);
|
return std::make_tuple(false, 0);
|
||||||
|
|
||||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
|
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
|
||||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||||
return std::make_tuple(false, 0);
|
return std::make_tuple(false, 0);
|
||||||
|
|
||||||
|
|
|
@ -995,6 +995,8 @@ void VKGSRender::end()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
||||||
|
|
||||||
//Load program here since it is dependent on vertex state
|
//Load program here since it is dependent on vertex state
|
||||||
if (!check_program_status())
|
if (!check_program_status())
|
||||||
{
|
{
|
||||||
|
@ -1003,14 +1005,17 @@ void VKGSRender::end()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||||
|
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||||
|
|
||||||
//Programs data is dependent on vertex state
|
//Programs data is dependent on vertex state
|
||||||
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
std::chrono::time_point<steady_clock> vertex_start = state_check_end;
|
||||||
auto upload_info = upload_vertex_data();
|
auto upload_info = upload_vertex_data();
|
||||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||||
|
|
||||||
//Load program
|
//Load program
|
||||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
std::chrono::time_point<steady_clock> program_start = vertex_end;
|
||||||
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
|
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
|
||||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||||
|
@ -1841,7 +1846,7 @@ bool VKGSRender::check_program_status()
|
||||||
if (dirty_framebuffer)
|
if (dirty_framebuffer)
|
||||||
return std::make_tuple(false, 0);
|
return std::make_tuple(false, 0);
|
||||||
|
|
||||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
|
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
|
||||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||||
return std::make_tuple(false, 0);
|
return std::make_tuple(false, 0);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue