mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 17:58:37 +12:00
rsx/texture_cache: Improve framebuffer memory locking when WCB/WDB is not enabled
- Adds a new mode that removes non-framebuffer stuff inside framebuffer range
This commit is contained in:
parent
563e205a72
commit
10dc3dadee
5 changed files with 59 additions and 35 deletions
|
@ -535,7 +535,7 @@ namespace rsx
|
||||||
|
|
||||||
// Sanity checks
|
// Sanity checks
|
||||||
AUDIT(exclusion_range.is_page_range());
|
AUDIT(exclusion_range.is_page_range());
|
||||||
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause == invalidation_cause::superseded_by_fbo || !exclusion_range.overlaps(data.fault_range));
|
AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause.skip_fbos() || !exclusion_range.overlaps(data.fault_range));
|
||||||
|
|
||||||
// Apply exclusion
|
// Apply exclusion
|
||||||
ranges_to_unprotect.exclude(exclusion_range);
|
ranges_to_unprotect.exclude(exclusion_range);
|
||||||
|
@ -792,7 +792,7 @@ namespace rsx
|
||||||
// Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
|
// Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored
|
||||||
(invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) ||
|
(invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) ||
|
||||||
// HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match
|
// HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match
|
||||||
(overlaps_fault_range && cause == invalidation_cause::superseded_by_fbo && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
|
(overlaps_fault_range && cause.skip_fbos() && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
// False positive
|
// False positive
|
||||||
|
@ -874,7 +874,7 @@ namespace rsx
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo)
|
// This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo)
|
||||||
AUDIT(cause == invalidation_cause::superseded_by_fbo || cause.is_read() && !result.sections_to_exclude.empty());
|
AUDIT(cause.skip_fbos() || cause.is_read() && !result.sections_to_exclude.empty());
|
||||||
|
|
||||||
// We did not handle this violation
|
// We did not handle this violation
|
||||||
result.clear_sections();
|
result.clear_sections();
|
||||||
|
@ -1262,7 +1262,7 @@ namespace rsx
|
||||||
return;
|
return;
|
||||||
|
|
||||||
std::lock_guard lock(m_cache_mutex);
|
std::lock_guard lock(m_cache_mutex);
|
||||||
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);
|
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::committed_as_fbo, std::forward<Args>(extras)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags)
|
void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags)
|
||||||
|
@ -1640,13 +1640,13 @@ namespace rsx
|
||||||
if (limit_x > slice_w)
|
if (limit_x > slice_w)
|
||||||
{
|
{
|
||||||
dst_width = (slice_w - dst_x);
|
dst_width = (slice_w - dst_x);
|
||||||
src_width = dst_width / scale_x;
|
src_width = u16(dst_width / scale_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (limit_y > slice_h)
|
if (limit_y > slice_h)
|
||||||
{
|
{
|
||||||
dst_height = (slice_h - dst_y);
|
dst_height = (slice_h - dst_y);
|
||||||
src_height = dst_height / scale_y;
|
src_height = u16(dst_height / scale_y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2060,7 +2060,7 @@ namespace rsx
|
||||||
u16 internal_height = required_surface_height;
|
u16 internal_height = required_surface_height;
|
||||||
get_native_dimensions(internal_width, internal_height, last.surface);
|
get_native_dimensions(internal_width, internal_height, last.surface);
|
||||||
|
|
||||||
if (last.width == internal_width && last.height == internal_height)
|
if (last.width >= internal_width && last.height >= internal_height)
|
||||||
{
|
{
|
||||||
verify(HERE), last.surface->test();
|
verify(HERE), last.surface->test();
|
||||||
return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth,
|
return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth,
|
||||||
|
|
|
@ -42,7 +42,8 @@ namespace rsx
|
||||||
};
|
};
|
||||||
|
|
||||||
struct invalidation_cause {
|
struct invalidation_cause {
|
||||||
enum enum_type {
|
enum enum_type
|
||||||
|
{
|
||||||
invalid = 0,
|
invalid = 0,
|
||||||
read,
|
read,
|
||||||
deferred_read,
|
deferred_read,
|
||||||
|
@ -50,7 +51,8 @@ namespace rsx
|
||||||
deferred_write,
|
deferred_write,
|
||||||
unmap, // fault range is being unmapped
|
unmap, // fault range is being unmapped
|
||||||
reprotect, // we are going to reprotect the fault range
|
reprotect, // we are going to reprotect the fault range
|
||||||
superseded_by_fbo // used by texture_cache::locked_memory_region
|
superseded_by_fbo, // used by texture_cache::locked_memory_region
|
||||||
|
committed_as_fbo // same as superseded_by_fbo but without locking or preserving page flags
|
||||||
} cause;
|
} cause;
|
||||||
|
|
||||||
constexpr bool valid() const
|
constexpr bool valid() const
|
||||||
|
@ -82,7 +84,13 @@ namespace rsx
|
||||||
return (cause == unmap || cause == reprotect || cause == superseded_by_fbo);
|
return (cause == unmap || cause == reprotect || cause == superseded_by_fbo);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool skip_flush() const
|
constexpr bool skip_fbos() const
|
||||||
|
{
|
||||||
|
AUDIT(valid());
|
||||||
|
return (cause == superseded_by_fbo || cause == committed_as_fbo);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool skip_flush() const
|
||||||
{
|
{
|
||||||
AUDIT(valid());
|
AUDIT(valid());
|
||||||
return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo);
|
return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo);
|
||||||
|
@ -1401,20 +1409,16 @@ namespace rsx
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
ASSERT(valid_length % rsx_pitch == 0);
|
|
||||||
|
|
||||||
u8 *_src = src;
|
u8 *_src = src;
|
||||||
u32 _dst = dst;
|
u32 _dst = dst;
|
||||||
const auto num_rows = valid_length / rsx_pitch;
|
|
||||||
|
|
||||||
const auto num_exclusions = flush_exclusions.size();
|
const auto num_exclusions = flush_exclusions.size();
|
||||||
if (num_exclusions > 0)
|
if (num_exclusions > 0)
|
||||||
{
|
{
|
||||||
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d rows), performance might suffer", num_exclusions, num_rows);
|
LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d bytes), performance might suffer", num_exclusions, valid_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 row = 0; row < num_rows; ++row)
|
for (s32 remaining = s32(valid_length); remaining > 0; remaining -= rsx_pitch)
|
||||||
{
|
{
|
||||||
imp_flush_memcpy(_dst, _src, real_pitch);
|
imp_flush_memcpy(_dst, _src, real_pitch);
|
||||||
_src += real_pitch;
|
_src += real_pitch;
|
||||||
|
|
|
@ -227,6 +227,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
const auto color_offsets = get_offsets();
|
const auto color_offsets = get_offsets();
|
||||||
const auto color_locations = get_locations();
|
const auto color_locations = get_locations();
|
||||||
|
|
||||||
|
const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format);
|
||||||
|
const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4);
|
||||||
|
|
||||||
gl::command_context cmd{ gl_state };
|
gl::command_context cmd{ gl_state };
|
||||||
|
|
||||||
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
|
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
|
||||||
|
@ -250,7 +253,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
color_targets[i] = rtt->id();
|
color_targets[i] = rtt->id();
|
||||||
|
|
||||||
rtt->set_rsx_pitch(layout.actual_color_pitch[i]);
|
rtt->set_rsx_pitch(layout.actual_color_pitch[i]);
|
||||||
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height };
|
m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height, color_bpp };
|
||||||
|
|
||||||
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
|
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
|
||||||
rtt->write_aa_mode = layout.aa_mode;
|
rtt->write_aa_mode = layout.aa_mode;
|
||||||
|
@ -279,7 +282,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
depth_stencil_target = ds->id();
|
depth_stencil_target = ds->id();
|
||||||
|
|
||||||
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch);
|
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch);
|
||||||
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height };
|
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp };
|
||||||
|
|
||||||
ds->write_aa_mode = layout.aa_mode;
|
ds->write_aa_mode = layout.aa_mode;
|
||||||
m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
|
m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
|
||||||
|
@ -380,7 +383,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
{
|
{
|
||||||
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
|
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
|
||||||
|
|
||||||
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
|
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors);
|
||||||
if (g_cfg.video.write_color_buffers)
|
if (g_cfg.video.write_color_buffers)
|
||||||
{
|
{
|
||||||
// Mark buffer regions as NO_ACCESS on Cell-visible side
|
// Mark buffer regions as NO_ACCESS on Cell-visible side
|
||||||
|
@ -395,7 +398,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
|
|
||||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||||
{
|
{
|
||||||
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
|
||||||
if (g_cfg.video.write_depth_buffer)
|
if (g_cfg.video.write_depth_buffer)
|
||||||
{
|
{
|
||||||
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
||||||
|
|
|
@ -2897,8 +2897,10 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
layout.color_addresses, layout.zeta_address,
|
layout.color_addresses, layout.zeta_address,
|
||||||
(*m_device), &*m_current_command_buffer);
|
(*m_device), &*m_current_command_buffer);
|
||||||
|
|
||||||
//Reset framebuffer information
|
// Reset framebuffer information
|
||||||
VkFormat old_format = VK_FORMAT_UNDEFINED;
|
VkFormat old_format = VK_FORMAT_UNDEFINED;
|
||||||
|
const auto color_bpp = get_format_block_size_in_bytes(layout.color_format);
|
||||||
|
|
||||||
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
|
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
|
||||||
{
|
{
|
||||||
//Flush old address if we keep missing it
|
//Flush old address if we keep missing it
|
||||||
|
@ -2916,6 +2918,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
m_surface_info[i].width = layout.width;
|
m_surface_info[i].width = layout.width;
|
||||||
m_surface_info[i].height = layout.height;
|
m_surface_info[i].height = layout.height;
|
||||||
m_surface_info[i].color_format = layout.color_format;
|
m_surface_info[i].color_format = layout.color_format;
|
||||||
|
m_surface_info[i].bpp = color_bpp;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Process depth surface as well
|
//Process depth surface as well
|
||||||
|
@ -2932,6 +2935,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
m_depth_surface_info.width = layout.width;
|
m_depth_surface_info.width = layout.width;
|
||||||
m_depth_surface_info.height = layout.height;
|
m_depth_surface_info.height = layout.height;
|
||||||
m_depth_surface_info.depth_format = layout.depth_format;
|
m_depth_surface_info.depth_format = layout.depth_format;
|
||||||
|
m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Bind created rtts as current fbo...
|
//Bind created rtts as current fbo...
|
||||||
|
@ -2975,7 +2979,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
{
|
{
|
||||||
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
||||||
|
|
||||||
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
|
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors);
|
||||||
if (g_cfg.video.write_color_buffers)
|
if (g_cfg.video.write_color_buffers)
|
||||||
{
|
{
|
||||||
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
||||||
|
@ -2989,7 +2993,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
|
|
||||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||||
{
|
{
|
||||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
|
||||||
if (g_cfg.video.write_depth_buffer)
|
if (g_cfg.video.write_depth_buffer)
|
||||||
{
|
{
|
||||||
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||||
|
|
|
@ -48,27 +48,40 @@ namespace rsx
|
||||||
u32 address = 0;
|
u32 address = 0;
|
||||||
u32 pitch = 0;
|
u32 pitch = 0;
|
||||||
|
|
||||||
bool is_depth_surface;
|
bool is_depth_surface = false;
|
||||||
|
|
||||||
rsx::surface_color_format color_format;
|
rsx::surface_color_format color_format;
|
||||||
rsx::surface_depth_format depth_format;
|
rsx::surface_depth_format depth_format;
|
||||||
|
|
||||||
u16 width;
|
u16 width = 0;
|
||||||
u16 height;
|
u16 height = 0;
|
||||||
|
u8 bpp = 0;
|
||||||
|
|
||||||
gcm_framebuffer_info()
|
address_range range{};
|
||||||
{
|
|
||||||
address = 0;
|
|
||||||
pitch = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
|
gcm_framebuffer_info() {}
|
||||||
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
|
|
||||||
|
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h, const u8 bpp_)
|
||||||
|
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h), bpp(bpp_)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
address_range get_memory_range(u32 aa_factor = 1) const
|
void calculate_memory_range(u32 aa_factor_u, u32 aa_factor_v)
|
||||||
{
|
{
|
||||||
return address_range::start_length(address, pitch * height * aa_factor);
|
// Account for the last line of the block not reaching the end
|
||||||
|
const u32 block_size = pitch * (height - 1) * aa_factor_v;
|
||||||
|
const u32 line_size = width * aa_factor_u * bpp;
|
||||||
|
range = address_range::start_length(address, block_size + line_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
address_range get_memory_range(const u32* aa_factors)
|
||||||
|
{
|
||||||
|
calculate_memory_range(aa_factors[0], aa_factors[1]);
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
|
||||||
|
address_range get_memory_range() const
|
||||||
|
{
|
||||||
|
return range;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue