mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 01:38:37 +12:00
rsx/texture_cache: Improve framebuffer memory locking when WCB/WDB is not enabled
- Adds a new mode that removes non-framebuffer stuff inside framebuffer range
This commit is contained in:
parent
563e205a72
commit
10dc3dadee
5 changed files with 59 additions and 35 deletions
|
@ -535,7 +535,7 @@ namespace rsx
|
|||
|
||||
// Sanity checks
|
||||
AUDIT(exclusion_range.is_page_range());
|
||||
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause == invalidation_cause::superseded_by_fbo || !exclusion_range.overlaps(data.fault_range));
|
||||
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause.skip_fbos() || !exclusion_range.overlaps(data.fault_range));
|
||||
|
||||
// Apply exclusion
|
||||
ranges_to_unprotect.exclude(exclusion_range);
|
||||
|
@ -792,7 +792,7 @@ namespace rsx
|
|||
// Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
|
||||
(invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) ||
|
||||
// HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match
|
||||
(overlaps_fault_range && cause == invalidation_cause::superseded_by_fbo && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
|
||||
(overlaps_fault_range && cause.skip_fbos() && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
|
||||
)
|
||||
{
|
||||
// False positive
|
||||
|
@ -874,7 +874,7 @@ namespace rsx
|
|||
else
|
||||
{
|
||||
// This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo)
|
||||
AUDIT(cause == invalidation_cause::superseded_by_fbo || cause.is_read() && !result.sections_to_exclude.empty());
|
||||
AUDIT(cause.skip_fbos() || cause.is_read() && !result.sections_to_exclude.empty());
|
||||
|
||||
// We did not handle this violation
|
||||
result.clear_sections();
|
||||
|
@ -1262,7 +1262,7 @@ namespace rsx
|
|||
return;
|
||||
|
||||
std::lock_guard lock(m_cache_mutex);
|
||||
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);
|
||||
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::committed_as_fbo, std::forward<Args>(extras)...);
|
||||
}
|
||||
|
||||
void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags)
|
||||
|
@ -1640,13 +1640,13 @@ namespace rsx
|
|||
if (limit_x > slice_w)
|
||||
{
|
||||
dst_width = (slice_w - dst_x);
|
||||
src_width = dst_width / scale_x;
|
||||
src_width = u16(dst_width / scale_x);
|
||||
}
|
||||
|
||||
if (limit_y > slice_h)
|
||||
{
|
||||
dst_height = (slice_h - dst_y);
|
||||
src_height = dst_height / scale_y;
|
||||
src_height = u16(dst_height / scale_y);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2060,7 +2060,7 @@ namespace rsx
|
|||
u16 internal_height = required_surface_height;
|
||||
get_native_dimensions(internal_width, internal_height, last.surface);
|
||||
|
||||
if (last.width == internal_width && last.height == internal_height)
|
||||
if (last.width >= internal_width && last.height >= internal_height)
|
||||
{
|
||||
verify(HERE), last.surface->test();
|
||||
return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth,
|
||||
|
|
|
@ -42,7 +42,8 @@ namespace rsx
|
|||
};
|
||||
|
||||
struct invalidation_cause {
|
||||
enum enum_type {
|
||||
enum enum_type
|
||||
{
|
||||
invalid = 0,
|
||||
read,
|
||||
deferred_read,
|
||||
|
@ -50,7 +51,8 @@ namespace rsx
|
|||
deferred_write,
|
||||
unmap, // fault range is being unmapped
|
||||
reprotect, // we are going to reprotect the fault range
|
||||
superseded_by_fbo // used by texture_cache::locked_memory_region
|
||||
superseded_by_fbo, // used by texture_cache::locked_memory_region
|
||||
committed_as_fbo // same as superseded_by_fbo but without locking or preserving page flags
|
||||
} cause;
|
||||
|
||||
constexpr bool valid() const
|
||||
|
@ -82,7 +84,13 @@ namespace rsx
|
|||
return (cause == unmap || cause == reprotect || cause == superseded_by_fbo);
|
||||
}
|
||||
|
||||
bool skip_flush() const
|
||||
constexpr bool skip_fbos() const
|
||||
{
|
||||
AUDIT(valid());
|
||||
return (cause == superseded_by_fbo || cause == committed_as_fbo);
|
||||
}
|
||||
|
||||
constexpr bool skip_flush() const
|
||||
{
|
||||
AUDIT(valid());
|
||||
return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo);
|
||||
|
@ -1401,20 +1409,16 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
|
||||
ASSERT(valid_length % rsx_pitch == 0);
|
||||
|
||||
u8 *_src = src;
|
||||
u32 _dst = dst;
|
||||
const auto num_rows = valid_length / rsx_pitch;
|
||||
|
||||
const auto num_exclusions = flush_exclusions.size();
|
||||
if (num_exclusions > 0)
|
||||
{
|
||||
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d rows), performance might suffer", num_exclusions, num_rows);
|
||||
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d bytes), performance might suffer", num_exclusions, valid_length);
|
||||
}
|
||||
|
||||
for (u32 row = 0; row < num_rows; ++row)
|
||||
for (s32 remaining = s32(valid_length); remaining > 0; remaining -= rsx_pitch)
|
||||
{
|
||||
imp_flush_memcpy(_dst, _src, real_pitch);
|
||||
_src += real_pitch;
|
||||
|
|
|
@ -227,6 +227,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
const auto color_offsets = get_offsets();
|
||||
const auto color_locations = get_locations();
|
||||
|
||||
const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format);
|
||||
const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4);
|
||||
|
||||
gl::command_context cmd{ gl_state };
|
||||
|
||||
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
|
||||
|
@ -250,7 +253,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
color_targets[i] = rtt->id();
|
||||
|
||||
rtt->set_rsx_pitch(layout.actual_color_pitch[i]);
|
||||
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height };
|
||||
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height, color_bpp };
|
||||
|
||||
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
|
||||
rtt->write_aa_mode = layout.aa_mode;
|
||||
|
@ -279,7 +282,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
depth_stencil_target = ds->id();
|
||||
|
||||
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch);
|
||||
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height };
|
||||
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp };
|
||||
|
||||
ds->write_aa_mode = layout.aa_mode;
|
||||
m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
|
||||
|
@ -380,7 +383,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
{
|
||||
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
|
||||
|
||||
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
|
||||
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors);
|
||||
if (g_cfg.video.write_color_buffers)
|
||||
{
|
||||
// Mark buffer regions as NO_ACCESS on Cell-visible side
|
||||
|
@ -395,7 +398,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
|
||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||
{
|
||||
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
||||
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
|
||||
if (g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
||||
|
|
|
@ -2897,8 +2897,10 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
layout.color_addresses, layout.zeta_address,
|
||||
(*m_device), &*m_current_command_buffer);
|
||||
|
||||
//Reset framebuffer information
|
||||
// Reset framebuffer information
|
||||
VkFormat old_format = VK_FORMAT_UNDEFINED;
|
||||
const auto color_bpp = get_format_block_size_in_bytes(layout.color_format);
|
||||
|
||||
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
|
||||
{
|
||||
//Flush old address if we keep missing it
|
||||
|
@ -2916,6 +2918,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
m_surface_info[i].width = layout.width;
|
||||
m_surface_info[i].height = layout.height;
|
||||
m_surface_info[i].color_format = layout.color_format;
|
||||
m_surface_info[i].bpp = color_bpp;
|
||||
}
|
||||
|
||||
//Process depth surface as well
|
||||
|
@ -2932,6 +2935,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
m_depth_surface_info.width = layout.width;
|
||||
m_depth_surface_info.height = layout.height;
|
||||
m_depth_surface_info.depth_format = layout.depth_format;
|
||||
m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4);
|
||||
}
|
||||
|
||||
//Bind created rtts as current fbo...
|
||||
|
@ -2975,7 +2979,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
{
|
||||
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
||||
|
||||
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
|
||||
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors);
|
||||
if (g_cfg.video.write_color_buffers)
|
||||
{
|
||||
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
||||
|
@ -2989,7 +2993,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
|
||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||
{
|
||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
|
||||
if (g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||
|
|
|
@ -48,27 +48,40 @@ namespace rsx
|
|||
u32 address = 0;
|
||||
u32 pitch = 0;
|
||||
|
||||
bool is_depth_surface;
|
||||
bool is_depth_surface = false;
|
||||
|
||||
rsx::surface_color_format color_format;
|
||||
rsx::surface_depth_format depth_format;
|
||||
|
||||
u16 width;
|
||||
u16 height;
|
||||
u16 width = 0;
|
||||
u16 height = 0;
|
||||
u8 bpp = 0;
|
||||
|
||||
gcm_framebuffer_info()
|
||||
{
|
||||
address = 0;
|
||||
pitch = 0;
|
||||
}
|
||||
address_range range{};
|
||||
|
||||
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
|
||||
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
|
||||
gcm_framebuffer_info() {}
|
||||
|
||||
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h, const u8 bpp_)
|
||||
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h), bpp(bpp_)
|
||||
{}
|
||||
|
||||
address_range get_memory_range(u32 aa_factor = 1) const
|
||||
void calculate_memory_range(u32 aa_factor_u, u32 aa_factor_v)
|
||||
{
|
||||
return address_range::start_length(address, pitch * height * aa_factor);
|
||||
// Account for the last line of the block not reaching the end
|
||||
const u32 block_size = pitch * (height - 1) * aa_factor_v;
|
||||
const u32 line_size = width * aa_factor_u * bpp;
|
||||
range = address_range::start_length(address, block_size + line_size);
|
||||
}
|
||||
|
||||
address_range get_memory_range(const u32* aa_factors)
|
||||
{
|
||||
calculate_memory_range(aa_factors[0], aa_factors[1]);
|
||||
return range;
|
||||
}
|
||||
|
||||
address_range get_memory_range() const
|
||||
{
|
||||
return range;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue