diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 09273fda31..c671dae9f1 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -35,6 +35,20 @@ namespace rsx {} }; + template + struct surface_overlap_info_t + { + surface_type surface = nullptr; + bool is_depth = false; + + u16 src_x = 0; + u16 src_y = 0; + u16 dst_x = 0; + u16 dst_y = 0; + u16 width = 0; + u16 height = 0; + }; + struct surface_format_info { u32 surface_width; @@ -120,6 +134,7 @@ namespace rsx using command_list_type = typename Traits::command_list_type; using download_buffer_object = typename Traits::download_buffer_object; using surface_subresource = surface_subresource_storage; + using surface_overlap_info = surface_overlap_info_t; std::unordered_map m_render_targets_storage = {}; std::unordered_map m_depth_stencil_storage = {}; @@ -846,5 +861,61 @@ namespace rsx return{}; } + + std::vector get_merged_texture_memory_region(u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp) + { + std::vector result; + const u32 limit = texaddr + (required_pitch * required_height); + + auto process_list_function = [&](std::unordered_map& data, bool is_depth) + { + for (auto &tex_info : data) + { + auto this_address = std::get<0>(tex_info); + if (this_address > limit) + continue; + + auto surface = std::get<1>(tex_info).get(); + const auto pitch = surface->get_rsx_pitch(); + if (pitch != required_pitch) + continue; + + const auto texture_size = pitch * surface->get_surface_height(); + if ((this_address + texture_size) <= texaddr) + continue; + + surface_overlap_info info; + info.surface = surface; + info.is_depth = is_depth; + + if (this_address < texaddr) + { + auto offset = texaddr - this_address; + info.src_y = (offset / required_pitch); + info.src_x = (offset % required_pitch) / bpp; + info.dst_x = 0; + info.dst_y = 0; + info.width = std::min(required_width, surface->get_surface_width() - info.src_x); + info.height = std::min(required_height, surface->get_surface_height() - info.src_y); + } + else + { + auto offset = this_address - texaddr; + info.src_x = 0; + info.src_y = 0; + info.dst_y = (offset / required_pitch); + info.dst_x = (offset % required_pitch) / bpp; + info.width = std::min(surface->get_surface_width(), required_width - info.dst_x); + info.height = std::min(surface->get_surface_height(), required_height - info.dst_y); + } + + result.push_back(info); + } + }; + + process_list_function(m_render_targets_storage, false); + process_list_function(m_depth_stencil_storage, true); + return result; + } }; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index bcf2ad8fc8..156a4792d3 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -251,10 +251,22 @@ namespace rsx u32 address_range = 0; }; + struct copy_region_descriptor + { + image_resource_type src; + u16 src_x; + u16 src_y; + u16 dst_x; + u16 dst_y; + u16 w; + u16 h; + }; + struct deferred_subresource { image_resource_type external_handle = 0; std::array external_cubemap_sources; + std::vector sections_to_copy; u32 base_address = 0; u32 gcm_format = 0; u16 x = 0; @@ -262,6 +274,8 @@ namespace rsx u16 width = 0; u16 height = 0; bool is_cubemap = false; + bool is_copy_cmd = false; + bool update_cached = false; deferred_subresource() {} @@ -370,6 +384,8 @@ namespace rsx virtual void set_up_remap_vector(section_storage_type& section, const std::pair, std::array>& remap_vector) = 0; virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0; virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::array& sources) = 0; + virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy) = 0; + virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0; constexpr u32 get_block_size() const { return 0x1000000; } inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); } @@ -1214,14 +1230,19 @@ namespace rsx found_desc.width != desc.width || found_desc.height != desc.height) continue; + if (desc.update_cached) + update_image_contents(cmd, It->second.second, desc.external_handle, desc.width, desc.height); + return It->second.second; } image_view_type result = 0; - if (!desc.is_cubemap) - result = create_temporary_subresource_view(cmd, &desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height); - else + if (desc.is_copy_cmd) + result = generate_atlas_from_images(cmd, desc.gcm_format, desc.width, desc.height, desc.sections_to_copy); + else if (desc.is_cubemap) result = generate_cubemap_from_images(cmd, desc.gcm_format, desc.width, desc.external_cubemap_sources); + else + result = create_temporary_subresource_view(cmd, &desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height); if (result) { @@ -1238,7 +1259,7 @@ namespace rsx template sampled_image_descriptor process_framebuffer_resource(commandbuffer_type& cmd, render_target_type texptr, u32 texaddr, u32 gcm_format, surface_store_type& m_rtts, - u16 tex_width, u16 tex_height, rsx::texture_dimension_extended extended_dimension, bool is_depth) + u16 tex_width, u16 tex_height, u16 tex_pitch, rsx::texture_dimension_extended extended_dimension, bool is_depth) { const u32 format = gcm_format & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); const auto surface_width = texptr->get_surface_width(); @@ -1321,7 +1342,43 @@ namespace rsx scale_y = 0.f; } + if (internal_width > surface_width || internal_height > surface_height) + { + auto bpp = get_format_block_size_in_bytes(format); + auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); + + if (overlapping.size() > 1) + { + const auto w = rsx::apply_resolution_scale(internal_width, true); + const auto h = rsx::apply_resolution_scale(internal_height, true); + + sampled_image_descriptor result = { texptr->get_surface(), texaddr, format, 0, 0, w, h, + texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, + rsx::texture_dimension_extended::texture_dimension_2d }; + + result.external_subresource_desc.is_copy_cmd = true; + result.external_subresource_desc.sections_to_copy.reserve(overlapping.size()); + + for (auto §ion : overlapping) + { + result.external_subresource_desc.sections_to_copy.push_back + ({ + section.surface->get_surface(), + rsx::apply_resolution_scale(section.src_x, true), + rsx::apply_resolution_scale(section.src_y, true), + rsx::apply_resolution_scale(section.dst_x, true), + rsx::apply_resolution_scale(section.dst_y, true), + rsx::apply_resolution_scale(section.width, true), + rsx::apply_resolution_scale(section.height, true) + }); + } + + return result; + } + } + bool requires_processing = surface_width > internal_width || surface_height > internal_height; + bool update_subresource_cache = false; if (!requires_processing) { //NOTE: The scale also accounts for sampling outside the RTT region, e.g render to one quadrant but send whole texture for sampling @@ -1343,6 +1400,7 @@ namespace rsx { LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); requires_processing = true; + update_subresource_cache = true; break; } else @@ -1362,6 +1420,7 @@ namespace rsx { LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr); requires_processing = true; + update_subresource_cache = true; } else { @@ -1376,8 +1435,12 @@ namespace rsx { const auto w = rsx::apply_resolution_scale(internal_width, true); const auto h = rsx::apply_resolution_scale(internal_height, true); - return{ texptr->get_surface(), texaddr, format, 0, 0, w, h, texture_upload_context::framebuffer_storage, + + sampled_image_descriptor result = { texptr->get_surface(), texaddr, format, 0, 0, w, h, texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d }; + + result.external_subresource_desc.update_cached = update_subresource_cache; + return result; } return{ texptr->get_view(), texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d }; @@ -1400,8 +1463,9 @@ namespace rsx const auto extended_dimension = tex.get_extended_texture_dimension(); u16 depth = 0; u16 tex_height = (u16)tex.height(); - u16 tex_pitch = tex.pitch(); const u16 tex_width = tex.width(); + u16 tex_pitch = is_compressed_format? (u16)(get_texture_size(tex) / tex_height) : tex.pitch(); //NOTE: Compressed textures dont have a real pitch (tex_size = (w*h)/6) + if (tex_pitch == 0) tex_pitch = tex_width * get_format_block_size_in_bytes(format); switch (extended_dimension) { @@ -1428,7 +1492,7 @@ namespace rsx { if (test_framebuffer(texaddr + texptr->raster_address_offset)) { - return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, false); + return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, false); } else { @@ -1441,7 +1505,7 @@ namespace rsx { if (test_framebuffer(texaddr + texptr->raster_address_offset)) { - return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, true); + return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, true); } else { @@ -1451,9 +1515,6 @@ namespace rsx } } - tex_pitch = is_compressed_format? (u16)(get_texture_size(tex) / tex_height) : tex_pitch; //NOTE: Compressed textures dont have a real pitch (tex_size = (w*h)/6) - if (tex_pitch == 0) tex_pitch = tex_width * get_format_block_size_in_bytes(format); - const bool unnormalized = (tex.format() & CELL_GCM_TEXTURE_UN) != 0; f32 scale_x = (unnormalized) ? (1.f / tex_width) : 1.f; f32 scale_y = (unnormalized) ? (1.f / tex_height) : 1.f; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index ba796fe7cd..f88ceb90cb 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -654,7 +654,7 @@ namespace gl m_temporary_surfaces.resize(0); } - u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u16 x, u16 y, u16 width, u16 height) + u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u16 x, u16 y, u16 width, u16 height, bool copy = true) { u32 dst_id = 0; @@ -686,17 +686,20 @@ namespace gl m_temporary_surfaces.push_back(dst_id); - //Empty GL_ERROR - glGetError(); - - glCopyImageSubData(src_id, GL_TEXTURE_2D, 0, x, y, 0, - dst_id, dst_type, 0, 0, 0, 0, width, height, 1); - - //Check for error - if (GLenum err = glGetError()) + if (copy) { - LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); - return 0; + //Empty GL_ERROR + glGetError(); + + glCopyImageSubData(src_id, GL_TEXTURE_2D, 0, x, y, 0, + dst_id, dst_type, 0, 0, 0, 0, width, height, 1); + + //Check for error + if (GLenum err = glGetError()) + { + LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); + return 0; + } } return dst_id; @@ -815,6 +818,26 @@ namespace gl return dst_id; } + u32 generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy) override + { + const GLenum ifmt = gl::get_sized_internal_format(gcm_format); + auto result = create_temporary_subresource_impl(sections_to_copy.front().src, ifmt, GL_TEXTURE_2D, 0, 0, width, height, false); + + for (const auto ®ion : sections_to_copy) + { + glCopyImageSubData(region.src, GL_TEXTURE_2D, 0, region.src_x, region.src_y, 0, + result, GL_TEXTURE_2D, 0, region.dst_x, region.dst_y, 0, region.w, region.h, 1); + } + + return result; + } + + void update_image_contents(void*&, u32 dst, u32 src, u16 width, u16 height) override + { + glCopyImageSubData(src, GL_TEXTURE_2D, 0, 0, 0, 0, + dst, GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); + } + cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags, const std::pair, std::array>& /*remap_vector*/) override diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index dd12f7652d..8425a797f1 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -564,7 +564,7 @@ namespace vk tex.destroy(); } - vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) + vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, bool copy = true) { VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; @@ -600,21 +600,24 @@ namespace vk VkImageSubresourceRange view_range = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }; view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, source->native_component_map, view_range)); - VkImageLayout old_src_layout = source->current_layout; + if (copy) + { + VkImageLayout old_src_layout = source->current_layout; - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - vk::change_image_layout(cmd, source, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, source, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { (s32)x, (s32)y, 0 }; - copy_rgn.dstOffset = { (s32)0, (s32)0, 0 }; - copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; - copy_rgn.extent = { w, h, 1 }; + VkImageCopy copy_rgn; + copy_rgn.srcOffset = { (s32)x, (s32)y, 0 }; + copy_rgn.dstOffset = { (s32)0, (s32)0, 0 }; + copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; + copy_rgn.extent = { w, h, 1 }; - vkCmdCopyImage(cmd, source->value, source->current_layout, image->value, image->current_layout, 1, ©_rgn); - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); - vk::change_image_layout(cmd, source, old_src_layout, subresource_range); + vkCmdCopyImage(cmd, source->value, source->current_layout, image->value, image->current_layout, 1, ©_rgn); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + vk::change_image_layout(cmd, source, old_src_layout, subresource_range); + } const u32 resource_memory = w * h * 4; //Rough approximate m_discardable_storage.push_back({ image, view }); @@ -706,6 +709,85 @@ namespace vk return m_discardable_storage.back().view.get(); } + vk::image_view* generate_atlas_from_images(vk::command_buffer& cmd, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy) override + { + auto result = create_temporary_subresource_view_impl(cmd, sections_to_copy.front().src, VK_IMAGE_TYPE_2D, + VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, false); + + VkImage dst = result->info.image; + VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; + + switch (sections_to_copy.front().src->info.format) + { + case VK_FORMAT_D16_UNORM: + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + break; + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + break; + } + + VkImageSubresourceRange subresource_range = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }; + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + + for (const auto ®ion : sections_to_copy) + { + VkImageLayout old_src_layout = region.src->current_layout; + vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + + VkImageCopy copy_rgn; + copy_rgn.srcOffset = { region.src_x, region.src_y, 0 }; + copy_rgn.dstOffset = { region.dst_x, region.dst_y, 0 }; + copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; + copy_rgn.extent = { region.w, region.h, 1 }; + + vkCmdCopyImage(cmd, region.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ©_rgn); + + vk::change_image_layout(cmd, region.src, old_src_layout, subresource_range); + } + + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + return result; + } + + void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override + { + VkImage dst = dst_view->info.image; + VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; + + switch (src->info.format) + { + case VK_FORMAT_D16_UNORM: + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + break; + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + break; + } + + VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + + VkImageLayout old_src_layout = src->current_layout; + vk::change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + + VkImageCopy copy_rgn; + copy_rgn.srcOffset = { 0, 0, 0 }; + copy_rgn.dstOffset = { 0, 0, 0 }; + copy_rgn.dstSubresource = { aspect & ~(VK_IMAGE_ASPECT_DEPTH_BIT), 0, 0, 1 }; + copy_rgn.srcSubresource = { aspect & ~(VK_IMAGE_ASPECT_DEPTH_BIT), 0, 0, 1 }; + copy_rgn.extent = { width, height, 1 }; + + vkCmdCopyImage(cmd, src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_rgn); + + vk::change_image_layout(cmd, src, old_src_layout, subresource_range); + vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + } + cached_texture_section* create_new_texture(vk::command_buffer& cmd, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags, const std::pair, std::array>& remap_vector) override