rsx/texture_cache: Improve framebuffer memory locking when WCB/WDB is not enabled

- Adds a new mode that removes non-framebuffer stuff inside framebuffer range
This commit is contained in:
kd-11 2019-02-27 21:26:22 +03:00 committed by kd-11
parent 563e205a72
commit 10dc3dadee
5 changed files with 59 additions and 35 deletions

View file

@ -535,7 +535,7 @@ namespace rsx
// Sanity checks
AUDIT(exclusion_range.is_page_range());
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause == invalidation_cause::superseded_by_fbo || !exclusion_range.overlaps(data.fault_range));
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause.skip_fbos() || !exclusion_range.overlaps(data.fault_range));
// Apply exclusion
ranges_to_unprotect.exclude(exclusion_range);
@ -792,7 +792,7 @@ namespace rsx
// Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
(invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) ||
// HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match
(overlaps_fault_range && cause == invalidation_cause::superseded_by_fbo && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
(overlaps_fault_range && cause.skip_fbos() && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
)
{
// False positive
@ -874,7 +874,7 @@ namespace rsx
else
{
// This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo)
AUDIT(cause == invalidation_cause::superseded_by_fbo || cause.is_read() && !result.sections_to_exclude.empty());
AUDIT(cause.skip_fbos() || cause.is_read() && !result.sections_to_exclude.empty());
// We did not handle this violation
result.clear_sections();
@ -1262,7 +1262,7 @@ namespace rsx
return;
std::lock_guard lock(m_cache_mutex);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::committed_as_fbo, std::forward<Args>(extras)...);
}
void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags)
@ -1640,13 +1640,13 @@ namespace rsx
if (limit_x > slice_w)
{
dst_width = (slice_w - dst_x);
src_width = dst_width / scale_x;
src_width = u16(dst_width / scale_x);
}
if (limit_y > slice_h)
{
dst_height = (slice_h - dst_y);
src_height = dst_height / scale_y;
src_height = u16(dst_height / scale_y);
}
}
@ -2060,7 +2060,7 @@ namespace rsx
u16 internal_height = required_surface_height;
get_native_dimensions(internal_width, internal_height, last.surface);
if (last.width == internal_width && last.height == internal_height)
if (last.width >= internal_width && last.height >= internal_height)
{
verify(HERE), last.surface->test();
return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth,

View file

@ -42,7 +42,8 @@ namespace rsx
};
struct invalidation_cause {
enum enum_type {
enum enum_type
{
invalid = 0,
read,
deferred_read,
@ -50,7 +51,8 @@ namespace rsx
deferred_write,
unmap, // fault range is being unmapped
reprotect, // we are going to reprotect the fault range
superseded_by_fbo // used by texture_cache::locked_memory_region
superseded_by_fbo, // used by texture_cache::locked_memory_region
committed_as_fbo // same as superseded_by_fbo but without locking or preserving page flags
} cause;
constexpr bool valid() const
@ -82,7 +84,13 @@ namespace rsx
return (cause == unmap || cause == reprotect || cause == superseded_by_fbo);
}
bool skip_flush() const
constexpr bool skip_fbos() const
{
AUDIT(valid());
return (cause == superseded_by_fbo || cause == committed_as_fbo);
}
constexpr bool skip_flush() const
{
AUDIT(valid());
return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo);
@ -1401,20 +1409,16 @@ namespace rsx
}
else
{
ASSERT(valid_length % rsx_pitch == 0);
u8 *_src = src;
u32 _dst = dst;
const auto num_rows = valid_length / rsx_pitch;
const auto num_exclusions = flush_exclusions.size();
if (num_exclusions > 0)
{
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d rows), performance might suffer", num_exclusions, num_rows);
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d bytes), performance might suffer", num_exclusions, valid_length);
}
for (u32 row = 0; row < num_rows; ++row)
for (s32 remaining = s32(valid_length); remaining > 0; remaining -= rsx_pitch)
{
imp_flush_memcpy(_dst, _src, real_pitch);
_src += real_pitch;

View file

@ -227,6 +227,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto color_offsets = get_offsets();
const auto color_locations = get_locations();
const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format);
const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4);
gl::command_context cmd{ gl_state };
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
@ -250,7 +253,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
color_targets[i] = rtt->id();
rtt->set_rsx_pitch(layout.actual_color_pitch[i]);
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height };
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height, color_bpp };
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->write_aa_mode = layout.aa_mode;
@ -279,7 +282,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
depth_stencil_target = ds->id();
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch);
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height };
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp };
ds->write_aa_mode = layout.aa_mode;
m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
@ -380,7 +383,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors);
if (g_cfg.video.write_color_buffers)
{
// Mark buffer regions as NO_ACCESS on Cell-visible side
@ -395,7 +398,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
if (g_cfg.video.write_depth_buffer)
{
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);

View file

@ -2897,8 +2897,10 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
layout.color_addresses, layout.zeta_address,
(*m_device), &*m_current_command_buffer);
//Reset framebuffer information
// Reset framebuffer information
VkFormat old_format = VK_FORMAT_UNDEFINED;
const auto color_bpp = get_format_block_size_in_bytes(layout.color_format);
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
//Flush old address if we keep missing it
@ -2916,6 +2918,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_surface_info[i].width = layout.width;
m_surface_info[i].height = layout.height;
m_surface_info[i].color_format = layout.color_format;
m_surface_info[i].bpp = color_bpp;
}
//Process depth surface as well
@ -2932,6 +2935,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_depth_surface_info.width = layout.width;
m_depth_surface_info.height = layout.height;
m_depth_surface_info.depth_format = layout.depth_format;
m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4);
}
//Bind created rtts as current fbo...
@ -2975,7 +2979,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors);
if (g_cfg.video.write_color_buffers)
{
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
@ -2989,7 +2993,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
if (g_cfg.video.write_depth_buffer)
{
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;

View file

@ -48,27 +48,40 @@ namespace rsx
u32 address = 0;
u32 pitch = 0;
bool is_depth_surface;
bool is_depth_surface = false;
rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format;
u16 width;
u16 height;
u16 width = 0;
u16 height = 0;
u8 bpp = 0;
gcm_framebuffer_info()
{
address = 0;
pitch = 0;
}
address_range range{};
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
gcm_framebuffer_info() {}
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h, const u8 bpp_)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h), bpp(bpp_)
{}
address_range get_memory_range(u32 aa_factor = 1) const
void calculate_memory_range(u32 aa_factor_u, u32 aa_factor_v)
{
return address_range::start_length(address, pitch * height * aa_factor);
// Account for the last line of the block not reaching the end
const u32 block_size = pitch * (height - 1) * aa_factor_v;
const u32 line_size = width * aa_factor_u * bpp;
range = address_range::start_length(address, block_size + line_size);
}
address_range get_memory_range(const u32* aa_factors)
{
calculate_memory_range(aa_factors[0], aa_factors[1]);
return range;
}
address_range get_memory_range() const
{
return range;
}
};