From 1943d9819fdf04a4c7319547a6ed351261736b45 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 10 Mar 2022 00:06:03 +0300 Subject: [PATCH] rsx: Clean up surface cache routines around RTT invalidate --- rpcs3/Emu/RSX/Common/surface_store.h | 64 ++++++++++++++-------------- rpcs3/Emu/RSX/Common/surface_utils.h | 7 ++- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 7 ++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 4 +- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index d46d35d182..cabce9bd3f 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -1,6 +1,7 @@ #pragma once #include "surface_utils.h" +#include "simple_array.hpp" #include "../gcm_enums.h" #include "../rsx_utils.h" #include @@ -56,10 +57,9 @@ namespace rsx rsx::surface_raster_type m_active_raster_type = rsx::surface_raster_type::linear; public: - std::pair m_bound_render_targets_config = {}; + rsx::simple_array m_bound_render_target_ids = {}; std::array, 4> m_bound_render_targets = {}; std::pair m_bound_depth_stencil = {}; - u8 m_bound_buffers_count = 0; // List of sections derived from a section that has been split and invalidated std::vector orphaned_surfaces; @@ -840,23 +840,19 @@ namespace rsx cache_tag = rsx::get_shared_tag(); m_invalidate_on_write = (antialias != rsx::surface_antialiasing::center_1_sample); m_active_raster_type = raster_type; - m_bound_buffers_count = 0; // Make previous RTTs sampleable - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) + for (const auto& i : m_bound_render_target_ids) { auto &rtt = m_bound_render_targets[i]; Traits::prepare_surface_for_sampling(command_list, std::get<1>(rtt)); rtt = std::make_pair(0, nullptr); } - const auto rtt_indices = utility::get_rtt_indexes(set_surface_target); - if (!rtt_indices.empty()) [[likely]] + m_bound_render_target_ids.clear(); + if (const auto rtt_indices = utility::get_rtt_indexes(set_surface_target); + !rtt_indices.empty()) [[likely]] { - m_bound_render_targets_config = { rtt_indices.front(), 0 }; - // Create/Reuse requested rtts for (u8 surface_index : rtt_indices) { @@ -867,14 +863,9 @@ namespace rsx bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); - ++m_bound_render_targets_config.second; - ++m_bound_buffers_count; + m_bound_render_target_ids.push_back(surface_index); } } - else - { - m_bound_render_targets_config = { 0, 0 }; - } // Same for depth buffer if (std::get<1>(m_bound_depth_stencil) != nullptr) @@ -887,8 +878,6 @@ namespace rsx m_bound_depth_stencil = std::make_pair(address_z, bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias, clip_width, clip_height, zeta_pitch, std::forward(extra_params)...)); - - ++m_bound_buffers_count; } else { @@ -898,7 +887,7 @@ namespace rsx u8 get_color_surface_count() const { - return m_bound_render_targets_config.second; + return static_cast(m_bound_render_target_ids.size()); } surface_type get_surface_at(u32 address) @@ -947,14 +936,15 @@ namespace rsx } } - bool address_is_bound(u32 address) const + inline bool address_is_bound(u32 address) const { - for (int i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++i, ++count) + ensure(address); + for (int i = 0; i < 4; ++i) { if (m_bound_render_targets[i].first == address) + { return true; + } } return (m_bound_depth_stencil.first == address); @@ -1127,21 +1117,33 @@ namespace rsx // TODO: Take WCB/WDB into account. Should speed this up a bit by skipping sync_tag calls write_tag = rsx::get_shared_tag(); - for (u8 i = m_bound_render_targets_config.first, count = 0; - count < m_bound_render_targets_config.second; - ++count, ++i) + for (const auto& i : m_bound_render_target_ids) { - if (auto surface = m_bound_render_targets[i].second; - surface && color_mrt_writes_enabled[i]) + if (color_mrt_writes_enabled[i]) { - surface->on_write(write_tag); + auto surface = m_bound_render_targets[i].second; + if (surface->last_use_tag > cache_tag) [[ likely ]] + { + surface->on_write_fast(write_tag); + } + else + { + surface->on_write(write_tag, rsx::surface_state_flags::require_resolve, m_active_raster_type); + } } } if (auto zsurface = m_bound_depth_stencil.second; zsurface && depth_stencil_writes_enabled) { - zsurface->on_write(write_tag); + if (zsurface->last_use_tag > cache_tag) [[ likely ]] + { + zsurface->on_write_fast(write_tag); + } + else + { + zsurface->on_write(write_tag, rsx::surface_state_flags::require_resolve, m_active_raster_type); + } } } @@ -1164,7 +1166,7 @@ namespace rsx ensure(m_active_memory_used == 0); m_bound_depth_stencil = std::make_pair(0, nullptr); - m_bound_render_targets_config = { 0, 0 }; + m_bound_render_target_ids.clear(); for (auto &rtt : m_bound_render_targets) { rtt = std::make_pair(0, nullptr); diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index d4c769c4e1..e4de529d86 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -623,11 +623,14 @@ namespace rsx } } - void on_invalidate_children() + inline void on_write_fast(u64 write_tag) { + ensure(write_tag); + last_use_tag = write_tag; + if (resolve_surface) { - msaa_flags = rsx::surface_state_flags::require_resolve; + msaa_flags |= rsx::surface_state_flags::require_resolve; } } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index e96e164cde..3ef9e4e833 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -644,16 +644,15 @@ void GLGSRender::clear_surface(u32 arg) gl_state.clear_color(clear_r, clear_g, clear_b, clear_a); mask |= GLenum(gl::buffers::color); - for (u8 index = m_rtts.m_bound_render_targets_config.first, count = 0; - count < m_rtts.m_bound_render_targets_config.second; - ++count, ++index) + int hw_index = 0; + for (const auto& index : m_rtts.m_bound_render_target_ids) { if (!full_frame) { m_rtts.m_bound_render_targets[index].second->write_barrier(cmd); } - gl_state.color_maski(count, colormask); + gl_state.color_maski(hw_index++, colormask); } update_color = true; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8bf95d7ec4..2b9d5de8cf 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1382,9 +1382,7 @@ void VKGSRender::clear_surface(u32 mask) if (!use_fast_clear || !full_frame) { // If we're not clobber all the memory, a barrier is required - for (u8 index = m_rtts.m_bound_render_targets_config.first, count = 0; - count < m_rtts.m_bound_render_targets_config.second; - ++count, ++index) + for (const auto& index : m_rtts.m_bound_render_target_ids) { m_rtts.m_bound_render_targets[index].second->write_barrier(*m_current_command_buffer); }