rsx/texture_cache: Improve framebuffer memory locking when WCB/WDB is not enabled

- Adds a new mode that removes non-framebuffer stuff inside framebuffer range
This commit is contained in:
kd-11 2019-02-27 21:26:22 +03:00 committed by kd-11
parent 563e205a72
commit 10dc3dadee
5 changed files with 59 additions and 35 deletions

View file

@ -535,7 +535,7 @@ namespace rsx
// Sanity checks // Sanity checks
AUDIT(exclusion_range.is_page_range()); AUDIT(exclusion_range.is_page_range());
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause == invalidation_cause::superseded_by_fbo || !exclusion_range.overlaps(data.fault_range)); AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause.skip_fbos() || !exclusion_range.overlaps(data.fault_range));
// Apply exclusion // Apply exclusion
ranges_to_unprotect.exclude(exclusion_range); ranges_to_unprotect.exclude(exclusion_range);
@ -792,7 +792,7 @@ namespace rsx
// Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored // Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
(invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) || (invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) ||
// HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match // HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match
(overlaps_fault_range && cause == invalidation_cause::superseded_by_fbo && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start) (overlaps_fault_range && cause.skip_fbos() && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
) )
{ {
// False positive // False positive
@ -874,7 +874,7 @@ namespace rsx
else else
{ {
// This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo) // This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo)
AUDIT(cause == invalidation_cause::superseded_by_fbo || cause.is_read() && !result.sections_to_exclude.empty()); AUDIT(cause.skip_fbos() || cause.is_read() && !result.sections_to_exclude.empty());
// We did not handle this violation // We did not handle this violation
result.clear_sections(); result.clear_sections();
@ -1262,7 +1262,7 @@ namespace rsx
return; return;
std::lock_guard lock(m_cache_mutex); std::lock_guard lock(m_cache_mutex);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...); invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::committed_as_fbo, std::forward<Args>(extras)...);
} }
void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags) void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags)
@ -1640,13 +1640,13 @@ namespace rsx
if (limit_x > slice_w) if (limit_x > slice_w)
{ {
dst_width = (slice_w - dst_x); dst_width = (slice_w - dst_x);
src_width = dst_width / scale_x; src_width = u16(dst_width / scale_x);
} }
if (limit_y > slice_h) if (limit_y > slice_h)
{ {
dst_height = (slice_h - dst_y); dst_height = (slice_h - dst_y);
src_height = dst_height / scale_y; src_height = u16(dst_height / scale_y);
} }
} }
@ -2060,7 +2060,7 @@ namespace rsx
u16 internal_height = required_surface_height; u16 internal_height = required_surface_height;
get_native_dimensions(internal_width, internal_height, last.surface); get_native_dimensions(internal_width, internal_height, last.surface);
if (last.width == internal_width && last.height == internal_height) if (last.width >= internal_width && last.height >= internal_height)
{ {
verify(HERE), last.surface->test(); verify(HERE), last.surface->test();
return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth, return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth,

View file

@ -42,7 +42,8 @@ namespace rsx
}; };
struct invalidation_cause { struct invalidation_cause {
enum enum_type { enum enum_type
{
invalid = 0, invalid = 0,
read, read,
deferred_read, deferred_read,
@ -50,7 +51,8 @@ namespace rsx
deferred_write, deferred_write,
unmap, // fault range is being unmapped unmap, // fault range is being unmapped
reprotect, // we are going to reprotect the fault range reprotect, // we are going to reprotect the fault range
superseded_by_fbo // used by texture_cache::locked_memory_region superseded_by_fbo, // used by texture_cache::locked_memory_region
committed_as_fbo // same as superseded_by_fbo but without locking or preserving page flags
} cause; } cause;
constexpr bool valid() const constexpr bool valid() const
@ -82,7 +84,13 @@ namespace rsx
return (cause == unmap || cause == reprotect || cause == superseded_by_fbo); return (cause == unmap || cause == reprotect || cause == superseded_by_fbo);
} }
bool skip_flush() const constexpr bool skip_fbos() const
{
AUDIT(valid());
return (cause == superseded_by_fbo || cause == committed_as_fbo);
}
constexpr bool skip_flush() const
{ {
AUDIT(valid()); AUDIT(valid());
return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo); return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo);
@ -1401,20 +1409,16 @@ namespace rsx
} }
else else
{ {
ASSERT(valid_length % rsx_pitch == 0);
u8 *_src = src; u8 *_src = src;
u32 _dst = dst; u32 _dst = dst;
const auto num_rows = valid_length / rsx_pitch;
const auto num_exclusions = flush_exclusions.size(); const auto num_exclusions = flush_exclusions.size();
if (num_exclusions > 0) if (num_exclusions > 0)
{ {
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d rows), performance might suffer", num_exclusions, num_rows); LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d bytes), performance might suffer", num_exclusions, valid_length);
} }
for (u32 row = 0; row < num_rows; ++row) for (s32 remaining = s32(valid_length); remaining > 0; remaining -= rsx_pitch)
{ {
imp_flush_memcpy(_dst, _src, real_pitch); imp_flush_memcpy(_dst, _src, real_pitch);
_src += real_pitch; _src += real_pitch;

View file

@ -227,6 +227,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto color_offsets = get_offsets(); const auto color_offsets = get_offsets();
const auto color_locations = get_locations(); const auto color_locations = get_locations();
const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format);
const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4);
gl::command_context cmd{ gl_state }; gl::command_context cmd{ gl_state };
for (int i = 0; i < rsx::limits::color_buffers_count; ++i) for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
@ -250,7 +253,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
color_targets[i] = rtt->id(); color_targets[i] = rtt->id();
rtt->set_rsx_pitch(layout.actual_color_pitch[i]); rtt->set_rsx_pitch(layout.actual_color_pitch[i]);
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height }; m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height, color_bpp };
rtt->tile = find_tile(color_offsets[i], color_locations[i]); rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->write_aa_mode = layout.aa_mode; rtt->write_aa_mode = layout.aa_mode;
@ -279,7 +282,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
depth_stencil_target = ds->id(); depth_stencil_target = ds->id();
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch); std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch);
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height }; m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp };
ds->write_aa_mode = layout.aa_mode; ds->write_aa_mode = layout.aa_mode;
m_gl_texture_cache.notify_surface_changed(layout.zeta_address); m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
@ -380,7 +383,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{ {
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors);
if (g_cfg.video.write_color_buffers) if (g_cfg.video.write_color_buffers)
{ {
// Mark buffer regions as NO_ACCESS on Cell-visible side // Mark buffer regions as NO_ACCESS on Cell-visible side
@ -395,7 +398,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (m_depth_surface_info.address && m_depth_surface_info.pitch) if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{ {
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
if (g_cfg.video.write_depth_buffer) if (g_cfg.video.write_depth_buffer)
{ {
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);

View file

@ -2897,8 +2897,10 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
layout.color_addresses, layout.zeta_address, layout.color_addresses, layout.zeta_address,
(*m_device), &*m_current_command_buffer); (*m_device), &*m_current_command_buffer);
//Reset framebuffer information // Reset framebuffer information
VkFormat old_format = VK_FORMAT_UNDEFINED; VkFormat old_format = VK_FORMAT_UNDEFINED;
const auto color_bpp = get_format_block_size_in_bytes(layout.color_format);
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{ {
//Flush old address if we keep missing it //Flush old address if we keep missing it
@ -2916,6 +2918,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_surface_info[i].width = layout.width; m_surface_info[i].width = layout.width;
m_surface_info[i].height = layout.height; m_surface_info[i].height = layout.height;
m_surface_info[i].color_format = layout.color_format; m_surface_info[i].color_format = layout.color_format;
m_surface_info[i].bpp = color_bpp;
} }
//Process depth surface as well //Process depth surface as well
@ -2932,6 +2935,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_depth_surface_info.width = layout.width; m_depth_surface_info.width = layout.width;
m_depth_surface_info.height = layout.height; m_depth_surface_info.height = layout.height;
m_depth_surface_info.depth_format = layout.depth_format; m_depth_surface_info.depth_format = layout.depth_format;
m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4);
} }
//Bind created rtts as current fbo... //Bind created rtts as current fbo...
@ -2975,7 +2979,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{ {
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors);
if (g_cfg.video.write_color_buffers) if (g_cfg.video.write_color_buffers)
{ {
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
@ -2989,7 +2993,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (m_depth_surface_info.address && m_depth_surface_info.pitch) if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{ {
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
if (g_cfg.video.write_depth_buffer) if (g_cfg.video.write_depth_buffer)
{ {
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;

View file

@ -48,27 +48,40 @@ namespace rsx
u32 address = 0; u32 address = 0;
u32 pitch = 0; u32 pitch = 0;
bool is_depth_surface; bool is_depth_surface = false;
rsx::surface_color_format color_format; rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format; rsx::surface_depth_format depth_format;
u16 width; u16 width = 0;
u16 height; u16 height = 0;
u8 bpp = 0;
gcm_framebuffer_info() address_range range{};
{
address = 0;
pitch = 0;
}
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h) gcm_framebuffer_info() {}
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h, const u8 bpp_)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h), bpp(bpp_)
{} {}
address_range get_memory_range(u32 aa_factor = 1) const void calculate_memory_range(u32 aa_factor_u, u32 aa_factor_v)
{ {
return address_range::start_length(address, pitch * height * aa_factor); // Account for the last line of the block not reaching the end
const u32 block_size = pitch * (height - 1) * aa_factor_v;
const u32 line_size = width * aa_factor_u * bpp;
range = address_range::start_length(address, block_size + line_size);
}
address_range get_memory_range(const u32* aa_factors)
{
calculate_memory_range(aa_factors[0], aa_factors[1]);
return range;
}
address_range get_memory_range() const
{
return range;
} }
}; };