From a5ed30a8c0292f58a03f9bd7b0dc3a7865f6abe4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 5 Apr 2019 14:39:43 +0300 Subject: [PATCH] rsx: Fixups for data cast operations via typeless transfer --- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 3 +- rpcs3/Emu/RSX/GL/GLTexture.cpp | 91 ++++++++++++++++++++++++++++ rpcs3/Emu/RSX/GL/GLTexture.h | 1 + rpcs3/Emu/RSX/GL/GLTextureCache.h | 31 +++++----- rpcs3/Emu/RSX/VK/VKFormats.cpp | 78 ++++++++++++++++++++++++ rpcs3/Emu/RSX/VK/VKFormats.h | 2 + rpcs3/Emu/RSX/VK/VKRenderTargets.h | 3 +- rpcs3/Emu/RSX/VK/VKTexture.cpp | 50 --------------- rpcs3/Emu/RSX/VK/VKTextureCache.h | 34 +++++------ 9 files changed, 207 insertions(+), 86 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 4e3624d5dd..c363857487 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -627,7 +627,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init else { // Mem cast, generate typeless xfer info - if (src_bpp != dst_bpp || aspect() != src_texture->aspect()) + if (!formats_are_bitcast_compatible((GLenum)get_internal_format(), (GLenum)src_texture->get_internal_format()) || + aspect() != src_texture->aspect()) { typeless_info.src_is_typeless = true; typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index f7a44b6e13..17276da052 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -615,6 +615,97 @@ namespace gl fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf); } + u32 get_format_texel_width(GLenum format) + { + switch (format) + { + case GL_R8: + return 1; + case GL_R32F: + case GL_RG16: + case GL_RG16F: + case GL_RGBA8: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return 4; + case GL_R16: + case GL_RG8: + case GL_RGB565: + return 2; + case GL_RGBA16F: + return 8; + case GL_RGBA32F: + return 16; + case GL_DEPTH_COMPONENT16: + return 2; + case GL_DEPTH24_STENCIL8: + case GL_DEPTH32F_STENCIL8: + return 4; + default: + fmt::throw_exception("Unexpected internal format 0x%X" HERE, (u32)format); + } + } + + std::pair get_format_convert_flags(GLenum format) + { + switch (format) + { + case GL_R8: + case GL_RG8: + case GL_RGBA8: + return { false, 1 }; + case GL_R16: + case GL_RG16: + case GL_RG16F: + case GL_RGB565: + case GL_RGBA16F: + return { true, 2 }; + case GL_R32F: + case GL_RGBA32F: + return { true, 4 }; + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return { false, 4 }; + case GL_DEPTH_COMPONENT16: + return { true, 2 }; + case GL_DEPTH24_STENCIL8: + case GL_DEPTH32F_STENCIL8: + return { true, 4 }; + default: + fmt::throw_exception("Unexpected internal format 0x%X" HERE, (u32)format); + } + } + + bool formats_are_bitcast_compatible(GLenum format1, GLenum format2) + { + if (LIKELY(format1 == format2)) + { + return true; + } + + // Formats are compatible if the following conditions are met: + // 1. Texel sizes must match + // 2. Both formats require no transforms (basic memcpy) or... + // 3. Both formats have the same transform (e.g RG16_UNORM to RG16_SFLOAT, both are down and uploaded with a 2-byte byteswap) + + if (get_format_texel_width(format1) != get_format_texel_width(format2)) + { + return false; + } + + const auto transform_a = get_format_convert_flags(format1); + const auto transform_b = get_format_convert_flags(format2); + + if (transform_a.first == transform_b.first) + { + return !transform_a.first || (transform_a.second == transform_b.second); + } + + return false; + } + void copy_typeless(texture * dst, const texture * src) { GLsizeiptr src_mem = src->width() * src->height(); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 1d49af865a..9d891674aa 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -21,6 +21,7 @@ namespace gl viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type); + bool formats_are_bitcast_compatible(GLenum format1, GLenum format2); void copy_typeless(texture* dst, const texture* src); /** * is_swizzled - determines whether input bytes are in morton order diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 9aa39aaed3..178eea5b99 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -596,9 +596,10 @@ namespace gl u16 x, u16 y, u16 width, u16 height, const texture_channel_remap_t& remap, bool copy) { if (sized_internal_fmt == GL_NONE) + { sized_internal_fmt = gl::get_sized_internal_format(gcm_format); + } - const auto ifmt = static_cast(sized_internal_fmt); std::unique_ptr dst = std::make_unique(dst_type, width, height, 1, 1, sized_internal_fmt); if (copy) @@ -615,15 +616,9 @@ namespace gl } std::array swizzle; - if (!src || (GLenum)ifmt != sized_internal_fmt) + if (!src || (GLenum)src->get_internal_format() != sized_internal_fmt) { - if (src) - { - //Format mismatch - warn_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); - } - - //Apply base component map onto the new texture if a data cast has been done + // Apply base component map onto the new texture if a data cast has been done swizzle = get_component_mapping(gcm_format, rsx::texture_create_flags::default_component_order); } else @@ -685,8 +680,8 @@ namespace gl if (!slice.src) continue; - const auto src_bpp = slice.src->pitch() / slice.src->width(); - const bool typeless = dst_bpp != src_bpp || dst_aspect != slice.src->aspect(); + const bool typeless = dst_aspect != slice.src->aspect() || + !formats_are_bitcast_compatible((GLenum)slice.src->get_internal_format(), (GLenum)dst_image->get_internal_format()); auto src_image = slice.src; auto src_x = slice.src_x; @@ -694,6 +689,7 @@ namespace gl if (UNLIKELY(typeless)) { + const auto src_bpp = slice.src->pitch() / slice.src->width(); const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp; tmp = std::make_unique(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, (GLenum)dst_image->get_internal_format()); @@ -842,10 +838,17 @@ namespace gl return result; } - void update_image_contents(gl::command_context&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override + void update_image_contents(gl::command_context& cmd, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override { - glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, 0, 0, 0, - dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); + std::vector region = + {{ + src, + surface_transform::identity, + 0, 0, 0, 0, 0, + width, height, width, height + }}; + + copy_transfer_regions_impl(cmd, dst->image(), region); } cached_texture_section* create_new_texture(gl::command_context&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, diff --git a/rpcs3/Emu/RSX/VK/VKFormats.cpp b/rpcs3/Emu/RSX/VK/VKFormats.cpp index b17ca45216..4b7cba6e18 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.cpp +++ b/rpcs3/Emu/RSX/VK/VKFormats.cpp @@ -368,4 +368,82 @@ namespace vk fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format); } + + std::pair get_format_convert_flags(VkFormat format) + { + switch (format) + { + //8-bit + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + return{ false, 1 }; + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + return{ true, 4 }; + //16-bit + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16A16_SFLOAT: + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + return{ true, 2 }; + //32-bit + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SFLOAT: + return{ true, 4 }; + //DXT + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + case VK_FORMAT_BC2_SRGB_BLOCK: + case VK_FORMAT_BC3_SRGB_BLOCK: + return{ false, 1 }; + //Depth + case VK_FORMAT_D16_UNORM: + return{ true, 2 }; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + return{ true, 4 }; + } + + fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format); + } + + bool formats_are_bitcast_compatible(VkFormat format1, VkFormat format2) + { + if (LIKELY(format1 == format2)) + { + return true; + } + + // Formats are compatible if the following conditions are met: + // 1. Texel sizes must match + // 2. Both formats require no transforms (basic memcpy) or... + // 3. Both formats have the same transform (e.g RG16_UNORM to RG16_SFLOAT, both are down and uploaded with a 2-byte byteswap) + + if (get_format_texel_width(format1) != get_format_texel_width(format2)) + { + return false; + } + + const auto transform_a = get_format_convert_flags(format1); + const auto transform_b = get_format_convert_flags(format2); + + if (transform_a.first == transform_b.first) + { + return !transform_a.first || (transform_a.second == transform_b.second); + } + + return false; + } } diff --git a/rpcs3/Emu/RSX/VK/VKFormats.h b/rpcs3/Emu/RSX/VK/VKFormats.h index 146664afd3..c26472f053 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.h +++ b/rpcs3/Emu/RSX/VK/VKFormats.h @@ -11,6 +11,8 @@ namespace vk VkFormat get_compatible_srgb_format(VkFormat rgb_format); u8 get_format_texel_width(VkFormat format); std::pair get_format_element_size(VkFormat format); + std::pair get_format_convert_flags(VkFormat format); + bool formats_are_bitcast_compatible(VkFormat format1, VkFormat format2); std::tuple get_min_filter_and_mip(rsx::texture_minify_filter min_filter); VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter); diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 7673869ed7..7288e7d540 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -118,7 +118,8 @@ namespace vk } else { - if (src_bpp != dst_bpp || src_texture->attachment_aspect_flag != attachment_aspect_flag) + if (!formats_are_bitcast_compatible(format(), src_texture->format()) || + src_texture->attachment_aspect_flag != attachment_aspect_flag) { typeless_info.src_is_typeless = true; typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index be1599526f..517b312c44 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -56,56 +56,6 @@ namespace vk } } - std::pair get_format_convert_flags(VkFormat format) - { - switch (format) - { - //8-bit - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_R8G8B8A8_UNORM: - return{ false, 1 }; - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - return{ true, 4 }; - //16-bit - case VK_FORMAT_R16_UINT: - case VK_FORMAT_R16_SFLOAT: - case VK_FORMAT_R16_UNORM: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_R16G16B16A16_SFLOAT: - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - case VK_FORMAT_R4G4B4A4_UNORM_PACK16: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - return{ true, 2 }; - //32-bit - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SFLOAT: - return{ true, 4 }; - //DXT - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: - case VK_FORMAT_BC2_SRGB_BLOCK: - case VK_FORMAT_BC3_SRGB_BLOCK: - return{ false, 1 }; - //Depth - case VK_FORMAT_D16_UNORM: - return{ true, 2 }; - case VK_FORMAT_D32_SFLOAT_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - return{ true, 4 }; - } - - fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format); - } - void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region) { switch (src->format()) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index d12ecacd9d..4857e56c7b 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -506,8 +506,8 @@ namespace vk if (!section.src) continue; - const auto src_bpp = vk::get_format_texel_width(section.src->format()); - const bool typeless = section.src->aspect() != dst_aspect || src_bpp != dst_bpp; + const bool typeless = section.src->aspect() != dst_aspect || + !formats_are_bitcast_compatible(dst->format(), section.src->format()); section.src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); @@ -517,6 +517,7 @@ namespace vk src_image = vk::get_typeless_helper(dst->info.format, section.src_x + section.src_w, section.src_y + section.src_h); src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + const auto src_bpp = vk::get_format_texel_width(section.src->format()); const u16 convert_w = u16(section.src_w * dst_bpp) / src_bpp; const areai src_rect = coordi{{ section.src_x, section.src_y }, { convert_w, section.src_h }}; const areai dst_rect = coordi{{ section.src_x, section.src_y }, { section.src_w, section.src_h }}; @@ -874,25 +875,18 @@ namespace vk void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override { - VkImage dst = dst_view->info.image; - VkImageAspectFlags aspect = vk::get_aspect_flags(src->info.format); - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + std::vector region = + {{ + src, + surface_transform::identity, + 0, 0, 0, 0, 0, + width, height, width, height + }}; - VkImageLayout old_src_layout = src->current_layout; - vk::change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); - - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { 0, 0, 0 }; - copy_rgn.dstOffset = { 0, 0, 0 }; - copy_rgn.dstSubresource = { aspect & ~(VK_IMAGE_ASPECT_DEPTH_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect & ~(VK_IMAGE_ASPECT_DEPTH_BIT), 0, 0, 1 }; - copy_rgn.extent = { width, height, 1 }; - - vkCmdCopyImage(cmd, src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_rgn); - - vk::change_image_layout(cmd, src, old_src_layout, subresource_range); - vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); + auto dst = dst_view->image(); + dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + copy_transfer_regions_impl(cmd, dst, region); + dst->pop_layout(cmd); } cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,