diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index a9aa8235f2..90489e5b1b 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -434,6 +434,7 @@ namespace rsx rsx::address_range m_depth_stencil_memory_range; public: + std::pair m_bound_render_targets_config = {}; std::array, 4> m_bound_render_targets = {}; std::pair m_bound_depth_stencil = {}; @@ -1000,22 +1001,36 @@ namespace rsx cache_tag = rsx::get_shared_tag(); // Make previous RTTs sampleable - for (auto &rtt : m_bound_render_targets) + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) { - if (std::get<1>(rtt) != nullptr) - Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt)); + auto &rtt = m_bound_render_targets[i]; + Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt)); rtt = std::make_pair(0, nullptr); } - // Create/Reuse requested rtts - for (u8 surface_index : utility::get_rtt_indexes(set_surface_target)) + const auto rtt_indices = utility::get_rtt_indexes(set_surface_target); + if (LIKELY(!rtt_indices.empty())) { - if (surface_addresses[surface_index] == 0) - continue; + m_bound_render_targets_config = { rtt_indices.front(), 0 }; - m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index], - bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, - clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); + // Create/Reuse requested rtts + for (u8 surface_index : rtt_indices) + { + if (surface_addresses[surface_index] == 0) + continue; + + m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index], + bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, + clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); + + m_bound_render_targets_config.second++; + } + } + else + { + m_bound_render_targets_config = { 0, 0 }; } // Same for depth buffer @@ -1288,17 +1303,15 @@ namespace rsx bool address_is_bound(u32 address) const { - for (auto &surface : m_bound_render_targets) + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) { - const u32 bound_address = std::get<0>(surface); - if (bound_address == address) + if (m_bound_render_targets[i].first == address) return true; } - if (std::get<0>(m_bound_depth_stencil) == address) - return true; - - return false; + return (m_bound_depth_stencil.first == address); } template @@ -1460,17 +1473,10 @@ namespace rsx } // Tag all available surfaces - for (int i = 0; i < m_bound_render_targets.size(); ++i) + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) { - // Usually only 1 or 2 buffers are bound anyway - if (LIKELY(!m_bound_render_targets[i].first)) - { - if (i) break; - - // B-surface binding - continue; - } - m_bound_render_targets[i].second->on_write(write_tag); } @@ -1481,14 +1487,10 @@ namespace rsx } else { - for (int i = 0; i < m_bound_render_targets.size(); ++i) + for (int i = m_bound_render_targets_config.first, count = 0; + count < m_bound_render_targets_config.second; + ++i, ++count) { - if (LIKELY(!m_bound_render_targets[i].first)) - { - if (i) break; - continue; - } - if (m_bound_render_targets[i].first != address) { continue; @@ -1527,6 +1529,7 @@ namespace rsx free_resource_list(m_depth_stencil_storage); m_bound_depth_stencil = std::make_pair(0, nullptr); + m_bound_render_targets_config = { 0, 0 }; for (auto &rtt : m_bound_render_targets) { rtt = std::make_pair(0, nullptr); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 0af7d1dd9c..815b56cb2d 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2135,35 +2135,43 @@ namespace rsx } } - // Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache - const u32 lookup_mask = (is_compressed_format) ? rsx::texture_upload_context::shader_read : - rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src; - - auto lookup_range = tex_range; - if (LIKELY(extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)) - { - // Optimize the range a bit by only searching for mip0, layer0 to avoid false positives - const auto texel_rows_per_line = get_format_texel_rows_per_line(format); - const auto num_rows = (tex_height + texel_rows_per_line - 1) / texel_rows_per_line; - if (const auto length = u32(num_rows * tex_pitch); length < tex_range.length()) - { - lookup_range = utils::address_range::start_length(texaddr, length); - } - } - reader_lock lock(m_cache_mutex); - const auto overlapping_locals = find_texture_from_range(lookup_range, tex_height > 1? tex_pitch : 0, lookup_mask); - for (auto& cached_texture : overlapping_locals) + if (LIKELY(is_compressed_format)) { - if (cached_texture->matches(texaddr, format, tex_width, tex_height, depth, 0)) + // Most mesh textures are stored as compressed to make the most of the limited memory + if (auto cached_texture = find_texture_from_dimensions(texaddr, format, tex_width, tex_height, depth)) { return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() }; } } - - if (!is_compressed_format) + else { + // Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache + const u32 lookup_mask = (is_compressed_format) ? rsx::texture_upload_context::shader_read : + rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src; + + auto lookup_range = tex_range; + if (LIKELY(extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)) + { + // Optimize the range a bit by only searching for mip0, layer0 to avoid false positives + const auto texel_rows_per_line = get_format_texel_rows_per_line(format); + const auto num_rows = (tex_height + texel_rows_per_line - 1) / texel_rows_per_line; + if (const auto length = u32(num_rows * tex_pitch); length < tex_range.length()) + { + lookup_range = utils::address_range::start_length(texaddr, length); + } + } + + const auto overlapping_locals = find_texture_from_range(lookup_range, tex_height > 1? tex_pitch : 0, lookup_mask); + for (auto& cached_texture : overlapping_locals) + { + if (cached_texture->matches(texaddr, format, tex_width, tex_height, depth, 0)) + { + return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() }; + } + } + // Next, attempt to merge blit engine and surface store // Blit sources contain info from any shader-read stuff in range // NOTE: Compressed formats require a reupload, facilitated by blit synchronization and/or WCB and are not handled here diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index f21df7cf87..8f10a593be 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -810,7 +810,6 @@ namespace rsx namespace nv3089 { -#pragma optimize("", off) void image_in(thread *rsx, u32 _reg, u32 arg) { const rsx::blit_engine::transfer_operation operation = method_registers.blit_engine_operation(); @@ -1191,7 +1190,6 @@ namespace rsx std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); } } -#pragma optimize("", on) } namespace nv0039