diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 2f9d9b3044..45f09a78d7 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -3,6 +3,7 @@ #include "Utilities/GSL.h" #include "Emu/Memory/vm.h" #include "../GCM.h" +#include "../rsx_utils.h" #include namespace @@ -507,7 +508,7 @@ namespace rsx // u32 clip_x = clip_horizontal_reg; // u32 clip_y = clip_vertical_reg; - cache_tag++; + cache_tag = rsx::get_shared_tag(); m_memory_tree.clear(); // Make previous RTTs sampleable @@ -542,15 +543,11 @@ namespace rsx } /** - * Search for given address in stored color surface and returns it if size/format match. + * Search for given address in stored color surface * Return an empty surface_type otherwise. */ surface_type get_texture_from_render_target_if_applicable(u32 address) { - // TODO: Handle texture that overlaps one (or several) surface. - // Handle texture conversion - // FIXME: Disgaea 3 loading screen seems to use a subset of a surface. It's not properly handled here. - // Note: not const because conversions/resolve/... can happen auto It = m_render_targets_storage.find(address); if (It != m_render_targets_storage.end()) return Traits::get(It->second); @@ -558,12 +555,11 @@ namespace rsx } /** - * Search for given address in stored depth stencil surface and returns it if size/format match. + * Search for given address in stored depth stencil surface * Return an empty surface_type otherwise. */ surface_type get_texture_from_depth_stencil_if_applicable(u32 address) { - // TODO: Same as above although there wasn't any game using corner case for DS yet. auto It = m_depth_stencil_storage.find(address); if (It != m_depth_stencil_storage.end()) return Traits::get(It->second); @@ -723,7 +719,7 @@ namespace rsx invalidated_resources.push_back(std::move(It->second)); m_render_targets_storage.erase(It); - cache_tag++; + cache_tag = rsx::get_shared_tag(); return; } } @@ -741,7 +737,7 @@ namespace rsx invalidated_resources.push_back(std::move(It->second)); m_depth_stencil_storage.erase(It); - cache_tag++; + cache_tag = rsx::get_shared_tag(); return; } } @@ -768,7 +764,7 @@ namespace rsx invalidated_resources.push_back(std::move(It->second)); m_render_targets_storage.erase(It); - cache_tag++; + cache_tag = rsx::get_shared_tag(); return; } } @@ -781,7 +777,7 @@ namespace rsx invalidated_resources.push_back(std::move(It->second)); m_depth_stencil_storage.erase(It); - cache_tag++; + cache_tag = rsx::get_shared_tag(); return; } } @@ -1138,7 +1134,7 @@ namespace rsx void notify_memory_structure_changed() { - cache_tag++; + cache_tag = rsx::get_shared_tag(); } }; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 4a794e8388..e5c50479a5 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -131,16 +131,25 @@ namespace rsx bool has_flushables = false; }; + enum surface_transform : u32 + { + identity = 0, + argb_to_bgra = 1 + }; + struct copy_region_descriptor { image_resource_type src; + surface_transform xform; u16 src_x; u16 src_y; u16 dst_x; u16 dst_y; u16 dst_z; - u16 w; - u16 h; + u16 src_w; + u16 src_h; + u16 dst_w; + u16 dst_h; }; enum deferred_request_command : u32 @@ -311,7 +320,7 @@ namespace rsx */ inline void update_cache_tag() { - m_cache_update_tag++; + m_cache_update_tag = rsx::get_shared_tag(); } template @@ -965,7 +974,7 @@ namespace rsx } - std::vector find_texture_from_range(const address_range &test_range) + std::vector find_texture_from_range(const address_range &test_range, u32 context_mask=0xFF) { std::vector results; @@ -977,8 +986,10 @@ namespace rsx //if (tex.get_section_base() > test_range.start) // continue; - if (!tex.is_dirty()) + if (!tex.is_dirty() && (context_mask & (u32)tex.get_context())) + { results.push_back(&tex); + } } return results; @@ -1376,7 +1387,15 @@ namespace rsx std::vector sections(6); for (u16 n = 0; n < 6; ++n) { - sections[n] = { desc.external_handle, 0, (u16)(desc.height * n), 0, 0, n, desc.width, desc.height }; + sections[n] = + { + desc.external_handle, + surface_transform::identity, + 0, (u16)(desc.height * n), + 0, 0, n, + desc.width, desc.height, + desc.width, desc.height + }; } result = generate_cubemap_from_images(cmd, desc.gcm_format, desc.width, sections, desc.remap); @@ -1393,7 +1412,15 @@ namespace rsx sections.resize(desc.depth); for (u16 n = 0; n < desc.depth; ++n) { - sections[n] = { desc.external_handle, 0, (u16)(desc.height * n), 0, 0, n, desc.width, desc.height }; + sections[n] = + { + desc.external_handle, + surface_transform::identity, + 0, (u16)(desc.height * n), + 0, 0, n, + desc.width, desc.height, + desc.width, desc.height + }; } result = generate_3d_from_2d_images(cmd, desc.gcm_format, desc.width, desc.height, desc.depth, sections, desc.remap); @@ -1455,16 +1482,19 @@ namespace rsx { section.surface->read_barrier(cmd); + const auto src_width = rsx::apply_resolution_scale(section.width, true), dst_width = src_width; + const auto src_height = rsx::apply_resolution_scale(section.height, true), dst_height = src_height; surfaces.push_back ({ section.surface->get_surface(), + surface_transform::identity, rsx::apply_resolution_scale(section.src_x, true), rsx::apply_resolution_scale(section.src_y, true), rsx::apply_resolution_scale(section.dst_x, true), rsx::apply_resolution_scale(section.dst_y, true), slice, - rsx::apply_resolution_scale(section.width, true), - rsx::apply_resolution_scale(section.height, true) + src_width, src_height, + dst_width, dst_height }); } } @@ -1554,52 +1584,141 @@ namespace rsx auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); bool requires_merging = false; - AUDIT(!overlapping.empty()); - if (overlapping.size() > 1) + verify(HERE), !overlapping.empty(); + if (LIKELY(overlapping.back().surface == texptr)) { - // The returned values are sorted with oldest first and newest last - // This allows newer data to overwrite older memory when merging the list - if (overlapping.back().surface == texptr) - { - // The texture 'proposed' by the previous lookup is the newest one - // If it occupies the entire requested region, just use it as-is - requires_merging = (internal_width > surface_width || internal_height > surface_height); - } - else - { - requires_merging = true; - } + // The texture 'proposed' by the previous lookup is the newest one + // If it occupies the entire requested region, just use it as-is + requires_merging = (internal_width > surface_width || internal_height > surface_height); + } + else + { + verify(HERE), overlapping.size() > 1; + requires_merging = true; } if (requires_merging) { - const auto w = rsx::apply_resolution_scale(internal_width, true); - const auto h = rsx::apply_resolution_scale(internal_height, true); + // TODO: For now we're only testing against blit engine dst, should add other types as wel + const auto range = rsx::address_range::start_length(texaddr, tex_pitch * tex_height); + auto local_resources = find_texture_from_range(range, rsx::texture_upload_context::blit_engine_dst); - sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather, - texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth, - scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; - - result.external_subresource_desc.sections_to_copy.reserve(overlapping.size()); - - for (auto §ion : overlapping) + if (local_resources.empty() && overlapping.size() == 1) { - section.surface->read_barrier(cmd); - - result.external_subresource_desc.sections_to_copy.push_back - ({ - section.surface->get_surface(), - rsx::apply_resolution_scale(section.src_x, true), - rsx::apply_resolution_scale(section.src_y, true), - rsx::apply_resolution_scale(section.dst_x, true), - rsx::apply_resolution_scale(section.dst_y, true), - 0, - rsx::apply_resolution_scale(section.width, true), - rsx::apply_resolution_scale(section.height, true) - }); + // TODO: Fall back to full upload and merge } + else + { + const auto w = rsx::apply_resolution_scale(internal_width, true); + const auto h = rsx::apply_resolution_scale(internal_height, true); - return result; + sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather, + texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth, + scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; + + result.external_subresource_desc.sections_to_copy.reserve(overlapping.size() + local_resources.size()); + + auto add_rtt_resource = [&](auto& section) + { + section.surface->read_barrier(cmd); + + const auto src_width = rsx::apply_resolution_scale(section.width, true), dst_width = src_width; + const auto src_height = rsx::apply_resolution_scale(section.height, true), dst_height = src_height; + result.external_subresource_desc.sections_to_copy.push_back + ({ + section.surface->get_surface(), + surface_transform::identity, + rsx::apply_resolution_scale(section.src_x, true), + rsx::apply_resolution_scale(section.src_y, true), + rsx::apply_resolution_scale(section.dst_x, true), + rsx::apply_resolution_scale(section.dst_y, true), + 0, + src_width, src_height, + dst_width, dst_height + }); + }; + + auto add_local_resource = [&](auto& section) + { + // Intersect this resource with the original one + const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format()); + const auto clipped = rsx::intersect_region(texaddr, tex_width, tex_height, bpp, + section->get_section_base(), section->get_width(), section->get_height(), section_bpp, tex_pitch); + + // Since output is upscaled, also upscale on dst + result.external_subresource_desc.sections_to_copy.push_back + ({ + section->get_raw_texture(), + is_depth ? surface_transform::identity : surface_transform::argb_to_bgra, + (u16)std::get<0>(clipped).x, + (u16)std::get<0>(clipped).y, + rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true), + rsx::apply_resolution_scale((u16)std::get<1>(clipped).y, true), + 0, + (u16)std::get<2>(clipped).width, + (u16)std::get<2>(clipped).height, + rsx::apply_resolution_scale((u16)std::get<2>(clipped).width, true), + rsx::apply_resolution_scale((u16)std::get<2>(clipped).height, true), + }); + }; + + if (LIKELY(local_resources.empty())) + { + for (auto §ion : overlapping) + { + add_rtt_resource(section); + } + } + else + { + // Need to preserve sorting order + struct sort_helper + { + u64 tag; // Timestamp + u32 list; // List source, 0 = fbo, 1 = local + u32 index; // Index in list + }; + + std::vector sort_list; + sort_list.reserve(overlapping.size() + local_resources.size()); + + for (u32 index = 0; index < overlapping.size(); ++index) + { + sort_list.push_back({ overlapping[index].surface->last_use_tag, 0, index }); + } + + for (u32 index = 0; index < local_resources.size(); ++index) + { + if (local_resources[index]->get_rsx_pitch() != tex_pitch) + continue; + + // TODO: Typeless transfers + if (local_resources[index]->is_depth_texture() != is_depth) + continue; + + sort_list.push_back({ local_resources[index]->last_write_tag, 1, index }); + } + + std::sort(sort_list.begin(), sort_list.end(), [](const auto &a, const auto &b) + { + return (a.tag < b.tag); + }); + + for (const auto &e : sort_list) + { + if (e.list == 0) + { + add_rtt_resource(overlapping[e.index]); + } + else + { + add_local_resource(local_resources[e.index]); + } + } + } + + return result; + } } bool requires_processing = surface_width > internal_width || surface_height > internal_height; @@ -1688,7 +1807,8 @@ namespace rsx { // Check for sampleable rtts from previous render passes // TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block - if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) + if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr); + texptr && texptr->get_rsx_pitch() == tex_pitch) { if (const bool is_active = m_rtts.address_is_bound(texaddr, false); is_active || texptr->test()) @@ -1704,7 +1824,8 @@ namespace rsx } } - if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) + if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); + texptr && texptr->get_rsx_pitch() == tex_pitch) { if (const bool is_active = m_rtts.address_is_bound(texaddr, true); is_active || texptr->test()) @@ -1797,13 +1918,12 @@ namespace rsx if (is_hw_blit_engine_compatible(format)) { //Find based on range instead - auto overlapping_surfaces = find_texture_from_range(tex_range); + auto overlapping_surfaces = find_texture_from_range(tex_range, rsx::texture_upload_context::blit_engine_dst); if (!overlapping_surfaces.empty()) { for (const auto &surface : overlapping_surfaces) { - if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst || - !surface->overlaps(tex_range, rsx::section_bounds::confirmed_range)) + if (!surface->overlaps(tex_range, rsx::section_bounds::confirmed_range)) continue; if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) @@ -1893,7 +2013,7 @@ namespace rsx u16 dst_h = dst.clip_height; //Check if src/dst are parts of render targets - auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, false, false); + auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, false, false, false); dst_is_render_target = dst_subres.surface != nullptr; if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch) @@ -2027,13 +2147,10 @@ namespace rsx if (!dst_is_render_target) { // Check for any available region that will fit this one - auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height)); + auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height), rsx::texture_upload_context::blit_engine_dst); for (const auto &surface : overlapping_surfaces) { - if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst) - continue; - if (surface->get_rsx_pitch() != dst.pitch) continue; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 4331f7f077..989834453d 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -733,6 +733,32 @@ namespace gl } } + void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector& sources) + { + for (const auto &slice : sources) + { + if (!slice.src) + continue; + + if (slice.src_w == slice.dst_w && slice.src_h == slice.dst_h) + { + glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, + dst_image->id(), (GLenum)dst_image->get_target(), 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.src_w, slice.src_h, 1); + } + else + { + verify(HERE), dst_image->get_target() == gl::texture::target::texture2D; + + auto _blitter = gl::g_hw_blitter; + const areai src_rect = { slice.src_x, slice.src_y, slice.src_x + slice.src_w, slice.src_y + slice.src_h }; + const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h }; + + _blitter->scale_image(cmd, slice.src, dst_image, + src_rect, dst_rect, false, false, {}); + } + } + } + protected: gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, @@ -748,7 +774,7 @@ namespace gl GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } - gl::texture_view* generate_cubemap_from_images(gl::command_context&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override + gl::texture_view* generate_cubemap_from_images(gl::command_context& cmd, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override { const GLenum ifmt = gl::get_sized_internal_format(gcm_format); std::unique_ptr dst_image = std::make_unique(GL_TEXTURE_CUBE_MAP, size, size, 1, 1, ifmt); @@ -757,14 +783,7 @@ namespace gl //Empty GL_ERROR glGetError(); - for (const auto &slice : sources) - { - if (slice.src) - { - glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, - dst_image->id(), GL_TEXTURE_CUBE_MAP, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1); - } - } + copy_transfer_regions_impl(cmd, dst_image.get(), sources); if (GLenum err = glGetError()) { @@ -777,7 +796,7 @@ namespace gl return result; } - gl::texture_view* generate_3d_from_2d_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override + gl::texture_view* generate_3d_from_2d_images(gl::command_context& cmd, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override { const GLenum ifmt = gl::get_sized_internal_format(gcm_format); std::unique_ptr dst_image = std::make_unique(GL_TEXTURE_3D, width, height, depth, 1, ifmt); @@ -786,14 +805,7 @@ namespace gl //Empty GL_ERROR glGetError(); - for (const auto &slice : sources) - { - if (slice.src) - { - glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, - dst_image->id(), GL_TEXTURE_3D, 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.w, slice.h, 1); - } - } + copy_transfer_regions_impl(cmd, dst_image.get(), sources); if (GLenum err = glGetError()) { @@ -806,17 +818,12 @@ namespace gl return result; } - gl::texture_view* generate_atlas_from_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, + gl::texture_view* generate_atlas_from_images(gl::command_context& cmd, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) override { auto result = create_temporary_subresource_impl(nullptr, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); - for (const auto ®ion : sections_to_copy) - { - glCopyImageSubData(region.src->id(), GL_TEXTURE_2D, 0, region.src_x, region.src_y, 0, - result->image()->id(), GL_TEXTURE_2D, 0, region.dst_x, region.dst_y, 0, region.w, region.h, 1); - } - + copy_transfer_regions_impl(cmd, result->image(), sections_to_copy); return result; } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 79b10bf189..87ebef33b1 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -496,6 +496,89 @@ namespace vk return mapping; } + void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) + { + for (const auto §ion : sections_to_transfer) + { + if (!section.src) + continue; + + VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst->info.format); + VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); + VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; + + if (section.src_w == section.dst_w && section.src_h == section.dst_h && + section.xform == surface_transform::identity) + { + VkImageLayout old_src_layout = section.src->current_layout; + VkImageCopy copy_rgn; + + copy_rgn.srcOffset = { section.src_x, section.src_y, 0 }; + copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 }; + copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 }; + copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.extent = { section.src_w, section.src_h, 1 }; + + vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); + vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, ©_rgn); + vk::change_image_layout(cmd, section.src, old_src_layout, src_range); + } + else + { + verify(HERE), section.dst_z == 0; + if (section.xform == surface_transform::identity) + { + vk::copy_scaled_image(cmd, section.src->value, dst->value, section.src->current_layout, dst->current_layout, + section.src_x, section.src_y, section.src_w, section.src_h, + section.dst_x, section.dst_y, section.dst_w, section.dst_h, + 1, src_aspect, section.src->info.format == dst->info.format, + VK_FILTER_NEAREST); + } + else if (section.xform == surface_transform::argb_to_bgra) + { + VkImageLayout old_src_layout = section.src->current_layout; + VkBufferImageCopy copy{}; + + copy.imageExtent = { section.src_w, section.src_h, 1 }; + copy.imageOffset = { section.src_x, section.src_y, 0 }; + copy.imageSubresource = { src_aspect, 0, 0, 1 }; + + auto scratch_buf = vk::get_scratch_buffer(); + vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); + vkCmdCopyImageToBuffer(cmd, section.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, scratch_buf->value, 1, ©); + + const auto length = section.src->width() * section.src->width() * 4; + vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + + auto shuffle_kernel = vk::get_compute_task(); + shuffle_kernel->run(cmd, scratch_buf, length); + + vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + + auto tmp = vk::get_typeless_helper(section.src->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h); + vk::change_image_layout(cmd, tmp, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { src_aspect, 0, 1, 0, 1 }); + copy.imageOffset = { 0, 0, 0 }; + + vkCmdCopyBufferToImage(cmd, scratch_buf->value, tmp->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); + + vk::copy_scaled_image(cmd, tmp->value, dst->value, tmp->current_layout, dst->current_layout, + 0, 0, section.src_w, section.src_h, + section.dst_x, section.dst_y, section.dst_w, section.dst_h, + 1, src_aspect, section.src->info.format == dst->info.format, + VK_FILTER_NEAREST); + + vk::change_image_layout(cmd, section.src, old_src_layout, src_range); + } + else + { + fmt::throw_exception("Unreachable" HERE); + } + } + } + } + protected: vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy) @@ -627,26 +710,7 @@ namespace vk vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); } - for (const auto §ion : sections_to_copy) - { - if (section.src) - { - VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); - VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; - VkImageLayout old_src_layout = section.src->current_layout; - vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); - - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { section.src_x, section.src_y, 0 }; - copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 }; - copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 }; - copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.extent = { section.w, section.h, 1 }; - - vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, image->value, image->current_layout, 1, ©_rgn); - vk::change_image_layout(cmd, section.src, old_src_layout, src_range); - } - } + copy_transfer_regions_impl(cmd, image.get(), sections_to_copy); vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); @@ -690,26 +754,7 @@ namespace vk vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); } - for (const auto §ion : sections_to_copy) - { - if (section.src) - { - VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); - VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; - VkImageLayout old_src_layout = section.src->current_layout; - vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); - - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { section.src_x, section.src_y, 0 }; - copy_rgn.dstOffset = { section.dst_x, section.dst_y, section.dst_z }; - copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.extent = { section.w, section.h, 1 }; - - vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, image->value, image->current_layout, 1, ©_rgn); - vk::change_image_layout(cmd, section.src, old_src_layout, src_range); - } - } + copy_transfer_regions_impl(cmd, image.get(), sections_to_copy); vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); @@ -727,32 +772,13 @@ namespace vk auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false); - VkImage dst = result->info.image; VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); - for (const auto ®ion : sections_to_copy) - { - VkImageAspectFlags src_aspect = vk::get_aspect_flags(region.src->info.format); - VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; - VkImageLayout old_src_layout = region.src->current_layout; - vk::change_image_layout(cmd, region.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); + copy_transfer_regions_impl(cmd, result->image(), sections_to_copy); - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { region.src_x, region.src_y, 0 }; - copy_rgn.dstOffset = { region.dst_x, region.dst_y, 0 }; - copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.extent = { region.w, region.h, 1 }; - - vkCmdCopyImage(cmd, region.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, ©_rgn); - - vk::change_image_layout(cmd, region.src, old_src_layout, src_range); - } - - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); + vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); return result; } diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index c88638671a..c875888112 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "rsx_utils.h" #include "rsx_methods.h" #include "RSXThread.h" @@ -14,6 +14,8 @@ extern "C" namespace rsx { + atomic_t g_rsx_shared_tag{ 0 }; + void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear) { diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 936f9918d6..8c5b0733e6 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -30,6 +30,7 @@ namespace rsx // Definitions class thread; extern thread* g_current_renderer; + extern atomic_t g_rsx_shared_tag; //Base for resources with reference counting struct ref_counted @@ -163,6 +164,12 @@ namespace rsx return static_cast((1ULL << 32) >> utils::cntlz32(x - 1, true)); } + // Returns an ever-increasing tag value + static inline u64 get_shared_tag() + { + return g_rsx_shared_tag++; + } + // Copy memory in inverse direction from source // Used to scale negatively x axis while transfering image data template @@ -411,6 +418,37 @@ namespace rsx return std::make_tuple(x, y, width, height); } + static inline std::tuple intersect_region( + u32 dst_address, u16 dst_w, u16 dst_h, u16 dst_bpp, + u32 src_address, u16 src_w, u16 src_h, u32 src_bpp, + u32 pitch) + { + if (src_address < dst_address) + { + const auto offset = dst_address - src_address; + const auto src_y = (offset / pitch); + const auto src_x = (offset % pitch) / src_bpp; + const auto dst_x = 0u; + const auto dst_y = 0u; + const auto w = std::min(dst_w, src_w - src_x); + const auto h = std::min(dst_h, src_h - src_y); + + return std::make_tuple({ src_x, src_y }, { dst_x, dst_y }, { w, h }); + } + else + { + const auto offset = dst_address - src_address; + const auto src_x = 0u; + const auto src_y = 0u; + const auto dst_y = (offset / pitch); + const auto dst_x = (offset % pitch) / dst_bpp; + const auto w = std::min(src_w, dst_w - dst_x); + const auto h = std::min(src_h, dst_h - dst_y); + + return std::make_tuple({ src_x, src_y }, { dst_x, dst_y }, { w, h }); + } + } + static inline const f32 get_resolution_scale() { return g_cfg.video.strict_rendering_mode? 1.f : ((f32)g_cfg.video.resolution_scale_percent / 100.f);