From 10dc3dadee6f1a04648601d862f40a0e69c0cdb4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 27 Feb 2019 21:26:22 +0300 Subject: [PATCH] rsx/texture_cache: Improve framebuffer memory locking when WCB/WDB is not enabled - Adds a new mode that removes non-framebuffer stuff inside framebuffer range --- rpcs3/Emu/RSX/Common/texture_cache.h | 14 ++++---- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 22 +++++++------ rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 11 ++++--- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 10 ++++-- rpcs3/Emu/RSX/rsx_utils.h | 37 +++++++++++++++------- 5 files changed, 59 insertions(+), 35 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 5cefb0ef5f..d22ddac257 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -535,7 +535,7 @@ namespace rsx // Sanity checks AUDIT(exclusion_range.is_page_range()); - AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause == invalidation_cause::superseded_by_fbo || !exclusion_range.overlaps(data.fault_range)); + AUDIT(data.cause.is_read() && !excluded->is_flushable() || data.cause.skip_fbos() || !exclusion_range.overlaps(data.fault_range)); // Apply exclusion ranges_to_unprotect.exclude(exclusion_range); @@ -792,7 +792,7 @@ namespace rsx // Unsynchronized sections (or any flushable when skipping flushes) that do not overlap the fault range directly can also be ignored (invalidation_ignore_unsynchronized && tex.is_flushable() && (cause.skip_flush() || !tex.is_synchronized()) && !overlaps_fault_range) || // HACK: When being superseded by an fbo, we preserve other overlapped fbos unless the start addresses match - (overlaps_fault_range && cause == invalidation_cause::superseded_by_fbo && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start) + (overlaps_fault_range && cause.skip_fbos() && tex.get_context() == texture_upload_context::framebuffer_storage && tex.get_section_base() != fault_range_in.start) ) { // False positive @@ -874,7 +874,7 @@ namespace rsx else { // This is a read and all overlapping sections were RO and were excluded (except for cause == superseded_by_fbo) - AUDIT(cause == invalidation_cause::superseded_by_fbo || cause.is_read() && !result.sections_to_exclude.empty()); + AUDIT(cause.skip_fbos() || cause.is_read() && !result.sections_to_exclude.empty()); // We did not handle this violation result.clear_sections(); @@ -1262,7 +1262,7 @@ namespace rsx return; std::lock_guard lock(m_cache_mutex); - invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward(extras)...); + invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::committed_as_fbo, std::forward(extras)...); } void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags) @@ -1640,13 +1640,13 @@ namespace rsx if (limit_x > slice_w) { dst_width = (slice_w - dst_x); - src_width = dst_width / scale_x; + src_width = u16(dst_width / scale_x); } if (limit_y > slice_h) { dst_height = (slice_h - dst_y); - src_height = dst_height / scale_y; + src_height = u16(dst_height / scale_y); } } @@ -2060,7 +2060,7 @@ namespace rsx u16 internal_height = required_surface_height; get_native_dimensions(internal_width, internal_height, last.surface); - if (last.width == internal_width && last.height == internal_height) + if (last.width >= internal_width && last.height >= internal_height) { verify(HERE), last.surface->test(); return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth, diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 3393b23c94..fe773b5251 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -42,7 +42,8 @@ namespace rsx }; struct invalidation_cause { - enum enum_type { + enum enum_type + { invalid = 0, read, deferred_read, @@ -50,7 +51,8 @@ namespace rsx deferred_write, unmap, // fault range is being unmapped reprotect, // we are going to reprotect the fault range - superseded_by_fbo // used by texture_cache::locked_memory_region + superseded_by_fbo, // used by texture_cache::locked_memory_region + committed_as_fbo // same as superseded_by_fbo but without locking or preserving page flags } cause; constexpr bool valid() const @@ -82,7 +84,13 @@ namespace rsx return (cause == unmap || cause == reprotect || cause == superseded_by_fbo); } - bool skip_flush() const + constexpr bool skip_fbos() const + { + AUDIT(valid()); + return (cause == superseded_by_fbo || cause == committed_as_fbo); + } + + constexpr bool skip_flush() const { AUDIT(valid()); return (cause == unmap) || (!g_cfg.video.strict_texture_flushing && cause == superseded_by_fbo); @@ -1401,20 +1409,16 @@ namespace rsx } else { - - ASSERT(valid_length % rsx_pitch == 0); - u8 *_src = src; u32 _dst = dst; - const auto num_rows = valid_length / rsx_pitch; const auto num_exclusions = flush_exclusions.size(); if (num_exclusions > 0) { - LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d rows), performance might suffer", num_exclusions, num_rows); + LOG_WARNING(RSX, "Slow imp_flush path triggered with non-empty flush_exclusions (%d exclusions, %d bytes), performance might suffer", num_exclusions, valid_length); } - for (u32 row = 0; row < num_rows; ++row) + for (s32 remaining = s32(valid_length); remaining > 0; remaining -= rsx_pitch) { imp_flush_memcpy(_dst, _src, real_pitch); _src += real_pitch; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 48ea2a03a4..40ecdad460 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -227,6 +227,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const auto color_offsets = get_offsets(); const auto color_locations = get_locations(); + const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format); + const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4); + gl::command_context cmd{ gl_state }; for (int i = 0; i < rsx::limits::color_buffers_count; ++i) @@ -250,7 +253,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk color_targets[i] = rtt->id(); rtt->set_rsx_pitch(layout.actual_color_pitch[i]); - m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height }; + m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height, color_bpp }; rtt->tile = find_tile(color_offsets[i], color_locations[i]); rtt->write_aa_mode = layout.aa_mode; @@ -279,7 +282,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk depth_stencil_target = ds->id(); std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch); - m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height }; + m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp }; ds->write_aa_mode = layout.aa_mode; m_gl_texture_cache.notify_surface_changed(layout.zeta_address); @@ -380,7 +383,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); + const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors); if (g_cfg.video.write_color_buffers) { // Mark buffer regions as NO_ACCESS on Cell-visible side @@ -395,7 +398,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors); if (g_cfg.video.write_depth_buffer) { const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index dd9fca8969..16ea6240c1 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2897,8 +2897,10 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) layout.color_addresses, layout.zeta_address, (*m_device), &*m_current_command_buffer); - //Reset framebuffer information + // Reset framebuffer information VkFormat old_format = VK_FORMAT_UNDEFINED; + const auto color_bpp = get_format_block_size_in_bytes(layout.color_format); + for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { //Flush old address if we keep missing it @@ -2916,6 +2918,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_surface_info[i].width = layout.width; m_surface_info[i].height = layout.height; m_surface_info[i].color_format = layout.color_format; + m_surface_info[i].bpp = color_bpp; } //Process depth surface as well @@ -2932,6 +2935,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_depth_surface_info.width = layout.width; m_depth_surface_info.height = layout.height; m_depth_surface_info.depth_format = layout.depth_format; + m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); } //Bind created rtts as current fbo... @@ -2975,7 +2979,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; - const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); + const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors); if (g_cfg.video.write_color_buffers) { m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, @@ -2989,7 +2993,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors); if (g_cfg.video.write_depth_buffer) { const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 8c5b0733e6..6b42e8fa78 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -48,27 +48,40 @@ namespace rsx u32 address = 0; u32 pitch = 0; - bool is_depth_surface; + bool is_depth_surface = false; rsx::surface_color_format color_format; rsx::surface_depth_format depth_format; - u16 width; - u16 height; + u16 width = 0; + u16 height = 0; + u8 bpp = 0; - gcm_framebuffer_info() - { - address = 0; - pitch = 0; - } + address_range range{}; - gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h) - :address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h) + gcm_framebuffer_info() {} + + gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h, const u8 bpp_) + :address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h), bpp(bpp_) {} - address_range get_memory_range(u32 aa_factor = 1) const + void calculate_memory_range(u32 aa_factor_u, u32 aa_factor_v) { - return address_range::start_length(address, pitch * height * aa_factor); + // Account for the last line of the block not reaching the end + const u32 block_size = pitch * (height - 1) * aa_factor_v; + const u32 line_size = width * aa_factor_u * bpp; + range = address_range::start_length(address, block_size + line_size); + } + + address_range get_memory_range(const u32* aa_factors) + { + calculate_memory_range(aa_factors[0], aa_factors[1]); + return range; + } + + address_range get_memory_range() const + { + return range; } };