From cc7848b3ef7b045d892f6fbbdc10c8108751c914 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 26 Jul 2018 20:52:22 +0300 Subject: [PATCH] vulkan: Fix blit engine transfer to ARGB8 render target memory --- rpcs3/Emu/RSX/Common/texture_cache.h | 8 ++++++ rpcs3/Emu/RSX/VK/VKHelpers.cpp | 2 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 43 +++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index eaea72bff2..3e70490d94 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -33,6 +33,8 @@ namespace rsx u32 dst_gcm_format = 0; f32 src_scaling_hint = 1.f; f32 dst_scaling_hint = 1.f; + texture_upload_context src_context = texture_upload_context::blit_engine_src; + texture_upload_context dst_context = texture_upload_context::blit_engine_dst; void analyse() { @@ -2125,6 +2127,7 @@ namespace rsx if (cached_dest) { dest_texture = cached_dest->get_raw_texture(); + typeless_info.dst_context = cached_dest->get_context(); max_dst_width = cached_dest->get_width(); max_dst_height = cached_dest->get_height(); @@ -2144,6 +2147,7 @@ namespace rsx dst_area.y2 += dst_subres.y; dest_texture = dst_subres.surface->get_surface(); + typeless_info.dst_context = texture_upload_context::framebuffer_storage; max_dst_width = (u16)(dst_subres.surface->get_surface_width() * typeless_info.dst_scaling_hint); max_dst_height = dst_subres.surface->get_surface_height(); @@ -2179,6 +2183,7 @@ namespace rsx src_area.y2 <= surface->get_height()) { vram_texture = surface->get_raw_texture(); + typeless_info.src_context = surface->get_context(); break; } @@ -2206,6 +2211,7 @@ namespace rsx subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled)->get_raw_texture(); m_texture_memory_in_use += src.pitch * src.slice_h; + typeless_info.src_context = texture_upload_context::blit_engine_src; } } else @@ -2232,6 +2238,7 @@ namespace rsx src_area.y2 += src_subres.y; vram_texture = src_subres.surface->get_surface(); + typeless_info.src_context = texture_upload_context::framebuffer_storage; } const bool src_is_depth = src_subres.is_depth_surface; @@ -2342,6 +2349,7 @@ namespace rsx channel_order); dest_texture = cached_dest->get_raw_texture(); + typeless_info.dst_context = texture_upload_context::blit_engine_dst; m_texture_memory_in_use += dst.pitch * dst_dimensions.height; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 47c0a568e1..8fb8726be8 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -195,7 +195,7 @@ namespace vk if (!g_scratch_buffer) { // 32M disposable scratch memory - g_scratch_buffer = std::make_unique(*g_current_renderer, 32 * 0x100000, + g_scratch_buffer = std::make_unique(*g_current_renderer, 64 * 0x100000, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0); } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index aaffdd03a7..dd42bca763 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1129,8 +1129,49 @@ namespace vk vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1, vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format)); } + else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage) + { + if (xfer_info.src_context != rsx::texture_upload_context::blit_engine_dst && + xfer_info.src_context != rsx::texture_upload_context::framebuffer_storage) + { + // Data moving to rendertarget, where byte ordering has to be preserved + // NOTE: This is a workaround, true accuracy would require all RTT<->cache transfers to invoke this step but thats too slow + // Sampling is ok; image view swizzle will work around it + if (dst->info.format == VK_FORMAT_B8G8R8A8_UNORM) + { + // For this specific format, channel ordering is faked via custom remap, undo this before transfer + VkBufferImageCopy copy{}; + copy.imageExtent = src->info.extent; + copy.imageOffset = { 0, 0, 0 }; + copy.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; - //Checks + const auto scratch_buf = vk::get_scratch_buffer(); + const auto data_length = src->info.extent.width * src->info.extent.height * 4; + + const auto current_layout = src->current_layout; + vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, ©); + vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + + vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + + vk::get_compute_task()->run(*commands, scratch_buf, data_length); + + vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + + real_src = vk::get_typeless_helper(src->info.format); + vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}); + + vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, ©); + } + } + } + + // Checks if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1) { LOG_ERROR(RSX, "Blit request consists of an empty region descriptor!");