From 366e4c24228c61a3997397cafce711c3b3c3e6ac Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 29 Mar 2019 22:04:54 +0300 Subject: [PATCH] rsx: Preliminary support for format conversions using typeless resolve --- rpcs3/Emu/RSX/Common/texture_cache.h | 130 ++++++-- rpcs3/Emu/RSX/GL/GLHelpers.h | 24 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 82 ++--- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 3 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 430 +++++++++++++++------------ rpcs3/Emu/RSX/VK/VKTextureCache.h | 154 +++++----- 6 files changed, 481 insertions(+), 342 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 964e002f31..4cef5efb73 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -951,14 +951,14 @@ namespace rsx } protected: - inline bool is_hw_blit_engine_compatible(u32 format) const + inline bool is_gcm_depth_format(u32 format) const { switch (format) { - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return true; default: return false; @@ -976,12 +976,12 @@ namespace rsx case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_R6G5B5: + //case CELL_GCM_TEXTURE_A4R4G4B4: + //case CELL_GCM_TEXTURE_G8B8: + //case CELL_GCM_TEXTURE_A1R5G5B5: + //case CELL_GCM_TEXTURE_R5G5B5A1: + //case CELL_GCM_TEXTURE_R5G6B5: + //case CELL_GCM_TEXTURE_R6G5B5: return CELL_GCM_TEXTURE_DEPTH16; } @@ -1731,8 +1731,9 @@ namespace rsx // Intersect this resource with the original one const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format()); + const auto normalized_width = (section->get_width() * section_bpp) / bpp; const auto clipped = rsx::intersect_region(address, slice_w, slice_h, bpp, - section->get_section_base(), section->get_width(), section->get_height(), section_bpp, pitch); + section->get_section_base(), normalized_width, section->get_height(), section_bpp, pitch); const auto slice_begin = u32(slice * src_slice_h); const auto slice_end = u32(slice_begin + slice_h); @@ -1747,6 +1748,7 @@ namespace rsx return; } + const u16 internal_clip_width = u16(std::get<2>(clipped).width * bpp) / section_bpp; if (scaling) { // Since output is upscaled, also upscale on dst @@ -1759,15 +1761,15 @@ namespace rsx rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true), rsx::apply_resolution_scale((u16)std::get<1>(clipped).y, true), slice, - (u16)std::get<2>(clipped).width, + internal_clip_width, (u16)std::get<2>(clipped).height, - rsx::apply_resolution_scale((u16)std::get<2>(clipped).width, true), + rsx::apply_resolution_scale(internal_clip_width, true), rsx::apply_resolution_scale((u16)std::get<2>(clipped).height, true), }); } else { - const auto src_width = (u16)std::get<2>(clipped).width, dst_width = src_width; + const auto src_width = internal_clip_width, dst_width = src_width; const auto src_height = (u16)std::get<2>(clipped).height, dst_height = src_height; surfaces.push_back ({ @@ -1893,7 +1895,6 @@ namespace rsx { texptr->read_barrier(cmd); - const bool is_depth = texptr->is_depth_surface(); const auto surface_width = texptr->get_surface_width(); const auto surface_height = texptr->get_surface_height(); @@ -1901,6 +1902,25 @@ namespace rsx u32 internal_height = tex_height; get_native_dimensions(internal_width, internal_height, texptr); + bool is_depth = texptr->is_depth_surface(); + const bool force_convert = !render_target_format_is_compatible(texptr, format); + + if (const bool gcm_format_is_depth = is_gcm_depth_format(format); + gcm_format_is_depth != is_depth) + { + if (force_convert) + { + is_depth = gcm_format_is_depth; + } + else + { + format = get_compatible_depth_format(format); + } + + // Always make sure the conflict is resolved! + verify(HERE), is_gcm_depth_format(format) == is_depth; + } + if (LIKELY(extended_dimension == rsx::texture_dimension_extended::texture_dimension_2d || extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d)) { @@ -1912,12 +1932,12 @@ namespace rsx if ((assume_bound && g_cfg.video.strict_rendering_mode) || internal_width < surface_width || internal_height < surface_height || - !render_target_format_is_compatible(texptr, format)) + force_convert) { const auto scaled_w = rsx::apply_resolution_scale(internal_width, true); const auto scaled_h = rsx::apply_resolution_scale(internal_height, true); - auto command = assume_bound ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; + const auto command = assume_bound ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; return { texptr->get_surface(), command, texaddr, format, 0, 0, scaled_w, scaled_h, 1, texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, extended_dimension, decoded_remap }; @@ -1965,7 +1985,16 @@ namespace rsx if (is_depth = (select_hint == 0) ? fbos.back().is_depth : local.back()->is_depth_texture(); is_depth) { - format = get_compatible_depth_format(format); + if (const auto suggested_format = get_compatible_depth_format(format); + !is_gcm_depth_format(suggested_format)) + { + // Failed! + is_depth = false; + } + else + { + format = suggested_format; + } } // If this method was called, there is no easy solution, likely means atlas gather is needed @@ -2147,13 +2176,14 @@ namespace rsx { // Surface cache data is newer, check if this thing fits our search parameters const auto& last = overlapping_fbos.back(); - if (last.src_x == 0 && last.src_y == 0 && last.surface->get_bpp() == bpp) + if (last.src_x == 0 && last.src_y == 0) { u16 internal_width = tex_width; u16 internal_height = required_surface_height; get_native_dimensions(internal_width, internal_height, last.surface); - if (last.width >= internal_width && last.height >= internal_height) + u16 normalized_width = u16(last.width * last.surface->get_bpp()) / bpp; + if (normalized_width >= internal_width && last.height >= internal_height) { return process_framebuffer_resource_fast(cmd, last.surface, texaddr, format, tex_width, tex_height, depth, scale_x, scale_y, extended_dimension, tex.remap(), tex.decoded_remap(), false); @@ -2163,12 +2193,66 @@ namespace rsx else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d) { const auto last = overlapping_locals.back(); + const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / bpp; + if (last->get_section_base() == texaddr && - get_format_block_size_in_bytes(last->get_gcm_format()) == bpp && - last->get_width() >= tex_width && last->get_height() >= tex_height) + normalized_width >= tex_width && last->get_height() >= tex_height) { - return { last->get_raw_texture(), deferred_request_command::copy_image_static, texaddr, format, 0, 0, - tex_width, tex_height, 1, last->get_context(), last->is_depth_texture(), + bool is_depth = last->is_depth_texture(); + u32 gcm_format = format; + + if (const auto gcm_format_is_depth = is_gcm_depth_format(format); + is_depth != gcm_format_is_depth) + { + // Conflict, resolve + if (gcm_format_is_depth) + { + is_depth = true; + } + else + { + const auto actual_format = last->get_gcm_format(); + bool resolved = false; + + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + { + // Compatible with D24S8_UINT + if (actual_format == CELL_GCM_TEXTURE_DEPTH24_D8) + { + gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8; + resolved = true; + is_depth = true; + } + break; + } + case CELL_GCM_TEXTURE_X16: + { + // Compatible with DEPTH16_UNORM + if (actual_format == CELL_GCM_TEXTURE_DEPTH16) + { + gcm_format = CELL_GCM_TEXTURE_DEPTH16; + resolved = true; + is_depth = true; + } + break; + } + } + + if (!resolved) + { + LOG_ERROR(RSX, "Reading texture with gcm format 0x%x as unexpected cast with format 0x%x", + actual_format, format); + + is_depth = gcm_format_is_depth; + } + } + } + + return { last->get_raw_texture(), deferred_request_command::copy_image_static, texaddr, gcm_format, 0, 0, + tex_width, tex_height, 1, last->get_context(), is_depth, scale_x, scale_y, extended_dimension, tex.decoded_remap() }; } } diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 48b848deb8..6c99b8c9ee 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -1322,6 +1322,13 @@ namespace gl } }; + enum image_aspect : u32 + { + color = 1, + depth = 2, + stencil = 4 + }; + class texture { public: @@ -1479,6 +1486,7 @@ namespace gl GLuint m_mipmaps = 0; GLuint m_pitch = 0; GLuint m_compressed = GL_FALSE; + GLuint m_aspect_flags = 0; target m_target = target::texture2D; internal_format m_internal_format = internal_format::rgba8; @@ -1563,18 +1571,21 @@ namespace gl m_height = height; m_depth = depth; m_mipmaps = mipmaps; + m_aspect_flags = image_aspect::color; switch (sized_format) { case GL_DEPTH_COMPONENT16: { m_pitch = width * 2; + m_aspect_flags = image_aspect::depth; break; } case GL_DEPTH24_STENCIL8: case GL_DEPTH32F_STENCIL8: { m_pitch = width * 4; + m_aspect_flags = image_aspect::depth | image_aspect::stencil; break; } case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: @@ -1688,6 +1699,11 @@ namespace gl return m_compressed; } + GLuint aspect() const + { + return m_aspect_flags; + } + sizei size2D() const { return{ (int)m_width, (int)m_height }; @@ -1800,13 +1816,6 @@ namespace gl } }; - enum image_aspect : u32 - { - color = 1, - depth = 2, - stencil = 4 - }; - class texture_view { GLuint m_id = 0; @@ -1950,6 +1959,7 @@ public: } } + verify(HERE), aspect() & aspect_flags; auto mapping = apply_swizzle_remap(get_native_component_layout(), remap); auto view = std::make_unique(this, mapping.data(), aspect_flags); auto result = view.get(); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 2348d608b7..9aa39aaed3 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -592,43 +592,26 @@ namespace gl m_temporary_surfaces.resize(0); } - gl::texture_view* create_temporary_subresource_impl(gl::texture* src, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, + gl::texture_view* create_temporary_subresource_impl(gl::command_context& cmd, gl::texture* src, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, u16 x, u16 y, u16 width, u16 height, const texture_channel_remap_t& remap, bool copy) { if (sized_internal_fmt == GL_NONE) sized_internal_fmt = gl::get_sized_internal_format(gcm_format); - gl::texture::internal_format ifmt = static_cast(sized_internal_fmt); - if (src) - { - ifmt = src->get_internal_format(); - switch (ifmt) - { - case gl::texture::internal_format::depth16: - case gl::texture::internal_format::depth24_stencil8: - case gl::texture::internal_format::depth32f_stencil8: - //HACK! Should use typeless transfer instead - sized_internal_fmt = (GLenum)ifmt; - break; - } - } - + const auto ifmt = static_cast(sized_internal_fmt); std::unique_ptr dst = std::make_unique(dst_type, width, height, 1, 1, sized_internal_fmt); if (copy) { - //Empty GL_ERROR - glGetError(); + std::vector region = + {{ + src, + surface_transform::identity, + x, y, 0, 0, 0, + width, height, width, height + }}; - glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, x, y, 0, - dst->id(), dst_type, 0, 0, 0, 0, width, height, 1); - - //Check for error - if (GLenum err = glGetError()) - { - LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err); - return nullptr; - } + copy_transfer_regions_impl(cmd, dst.get(), region); } std::array swizzle; @@ -694,37 +677,56 @@ namespace gl void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector& sources) const { + const auto dst_bpp = dst_image->pitch() / dst_image->width(); + const auto dst_aspect = dst_image->aspect(); + for (const auto &slice : sources) { if (!slice.src) continue; + const auto src_bpp = slice.src->pitch() / slice.src->width(); + const bool typeless = dst_bpp != src_bpp || dst_aspect != slice.src->aspect(); + + auto src_image = slice.src; + auto src_x = slice.src_x; + std::unique_ptr tmp; + + if (UNLIKELY(typeless)) + { + const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp; + tmp = std::make_unique(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, (GLenum)dst_image->get_internal_format()); + + src_image = tmp.get(); + src_x = u16(src_x * src_bpp) / dst_bpp; + gl::copy_typeless(src_image, slice.src); + } + if (slice.src_w == slice.dst_w && slice.src_h == slice.dst_h) { - glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0, + glCopyImageSubData(src_image->id(), GL_TEXTURE_2D, 0, src_x, slice.src_y, 0, dst_image->id(), (GLenum)dst_image->get_target(), 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.src_w, slice.src_h, 1); } else { verify(HERE), dst_image->get_target() == gl::texture::target::texture2D; - std::unique_ptr tmp; - auto _dst = dst_image; - auto _blitter = gl::g_hw_blitter; - const areai src_rect = { slice.src_x, slice.src_y, slice.src_x + slice.src_w, slice.src_y + slice.src_h }; + const areai src_rect = { src_x, slice.src_y, src_x + slice.src_w, slice.src_y + slice.src_h }; const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h }; - if (UNLIKELY(slice.src->get_internal_format() != dst_image->get_internal_format())) + auto _dst = dst_image; + if (UNLIKELY(src_image->get_internal_format() != dst_image->get_internal_format())) { + verify(HERE), !typeless; tmp = std::make_unique(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, (GLenum)slice.src->get_internal_format()); _dst = tmp.get(); } - _blitter->scale_image(cmd, slice.src, _dst, + _blitter->scale_image(cmd, src_image, _dst, src_rect, dst_rect, false, false, {}); - if (tmp) + if (_dst != dst_image) { // Data cast comes after scaling glCopyImageSubData(tmp->id(), GL_TEXTURE_2D, 0, slice.dst_x, slice.dst_y, 0, @@ -773,16 +775,16 @@ namespace gl protected: - gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, + gl::texture_view* create_temporary_subresource_view(gl::command_context &cmd, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { - return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); + return create_temporary_subresource_impl(cmd, *src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } - gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, + gl::texture_view* create_temporary_subresource_view(gl::command_context &cmd, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { - return create_temporary_subresource_impl(src, (GLenum)src->get_internal_format(), + return create_temporary_subresource_impl(cmd, src, (GLenum)src->get_internal_format(), GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } @@ -834,7 +836,7 @@ namespace gl const texture_channel_remap_t& remap_vector) override { auto _template = get_template_from_collection_impl(sections_to_copy); - auto result = create_temporary_subresource_impl(_template, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); + auto result = create_temporary_subresource_impl(cmd, _template, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); copy_transfer_regions_impl(cmd, result->image(), sections_to_copy); return result; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index be94397e06..cb9dc18035 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -532,8 +532,7 @@ namespace vk { if (image->current_layout == new_layout) return; - VkImageAspectFlags flags = get_aspect_flags(image->info.format); - change_image_layout(cmd, image->value, image->current_layout, new_layout, { flags, 0, 1, 0, 1 }); + change_image_layout(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, 1, 0, 1 }); image->current_layout = new_layout; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 0c9cf03bf8..a78e596100 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -86,7 +86,7 @@ namespace vk class swap_chain_image; class physical_device; class command_buffer; - struct image; + class image; struct buffer; struct data_heap; class mem_allocator_base; @@ -642,8 +642,203 @@ namespace vk } }; - struct image + class command_pool { + vk::render_device *owner = nullptr; + VkCommandPool pool = nullptr; + + public: + command_pool() {} + ~command_pool() {} + + void create(vk::render_device &dev) + { + owner = &dev; + VkCommandPoolCreateInfo infos = {}; + infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + + CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool)); + } + + void destroy() + { + if (!pool) + return; + + vkDestroyCommandPool((*owner), pool, nullptr); + pool = nullptr; + } + + vk::render_device& get_owner() + { + return (*owner); + } + + operator VkCommandPool() + { + return pool; + } + }; + + class command_buffer + { + private: + bool is_open = false; + bool is_pending = false; + VkFence m_submit_fence = VK_NULL_HANDLE; + + protected: + vk::command_pool *pool = nullptr; + VkCommandBuffer commands = nullptr; + + public: + enum access_type_hint + { + flush_only, //Only to be submitted/opened/closed via command flush + all //Auxiliary, can be submitted/opened/closed at any time + } + access_hint = flush_only; + + enum command_buffer_data_flag : u32 + { + cb_has_occlusion_task = 1, + cb_has_blit_transfer = 2, + cb_has_dma_transfer = 4 + }; + u32 flags = 0; + + public: + command_buffer() {} + ~command_buffer() {} + + void create(vk::command_pool &cmd_pool, bool auto_reset = false) + { + VkCommandBufferAllocateInfo infos = {}; + infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + infos.commandBufferCount = 1; + infos.commandPool = (VkCommandPool)cmd_pool; + infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands)); + + if (auto_reset) + { + VkFenceCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence)); + } + + pool = &cmd_pool; + } + + void destroy() + { + vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands); + + if (m_submit_fence) + { + vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr); + } + } + + vk::command_pool& get_command_pool() const + { + return *pool; + } + + void clear_flags() + { + flags = 0; + } + + void set_flag(command_buffer_data_flag flag) + { + flags |= flag; + } + + operator VkCommandBuffer() const + { + return commands; + } + + bool is_recording() const + { + return is_open; + } + + void begin() + { + if (m_submit_fence && is_pending) + { + wait_for_fence(m_submit_fence); + is_pending = false; + + CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence)); + CHECK_RESULT(vkResetCommandBuffer(commands, 0)); + } + + if (is_open) + return; + + VkCommandBufferInheritanceInfo inheritance_info = {}; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + + VkCommandBufferBeginInfo begin_infos = {}; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + CHECK_RESULT(vkBeginCommandBuffer(commands, &begin_infos)); + is_open = true; + } + + void end() + { + if (!is_open) + { + LOG_ERROR(RSX, "commandbuffer->end was called but commandbuffer is not in a recording state"); + return; + } + + CHECK_RESULT(vkEndCommandBuffer(commands)); + is_open = false; + } + + void submit(VkQueue queue, const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) + { + if (is_open) + { + LOG_ERROR(RSX, "commandbuffer->submit was called whilst the command buffer is in a recording state"); + return; + } + + if (fence == VK_NULL_HANDLE) + { + fence = m_submit_fence; + is_pending = (fence != VK_NULL_HANDLE); + } + + VkSubmitInfo infos = {}; + infos.commandBufferCount = 1; + infos.pCommandBuffers = &commands; + infos.pWaitDstStageMask = &pipeline_stage_flags; + infos.pWaitSemaphores = semaphores.data(); + infos.waitSemaphoreCount = static_cast(semaphores.size()); + infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + acquire_global_submit_lock(); + CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); + release_global_submit_lock(); + + clear_flags(); + } + }; + + class image + { + std::stack m_layout_stack; + VkImageAspectFlags m_storage_aspect = 0; + + public: VkImage value = VK_NULL_HANDLE; VkComponentMapping native_component_map = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; VkImageLayout current_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -692,6 +887,8 @@ namespace vk memory = std::make_shared(m_device, memory_req.size, memory_req.alignment, memory_type_index); CHECK_RESULT(vkBindImageMemory(m_device, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset())); + + m_storage_aspect = get_aspect_flags(format); } // TODO: Ctor that uses a provided memory heap @@ -719,6 +916,40 @@ namespace vk return info.extent.depth; } + VkFormat format() const + { + return info.format; + } + + VkImageAspectFlags aspect() const + { + return m_storage_aspect; + } + + void push_layout(command_buffer& cmd, VkImageLayout layout) + { + m_layout_stack.push(current_layout); + change_image_layout(cmd, this, layout); + } + + void pop_layout(command_buffer& cmd) + { + verify(HERE), !m_layout_stack.empty(); + + auto layout = m_layout_stack.top(); + m_layout_stack.pop(); + change_image_layout(cmd, this, layout); + } + + void change_layout(command_buffer& cmd, VkImageLayout new_layout) + { + if (current_layout == new_layout) + return; + + verify(HERE), m_layout_stack.empty(); + change_image_layout(cmd, this, new_layout); + } + private: VkDevice m_device; }; @@ -851,7 +1082,9 @@ namespace vk remap ); - const auto range = vk::get_image_subresource_range(0, 0, info.arrayLayers, info.mipLevels, get_aspect_flags(info.format) & mask); + const auto range = vk::get_image_subresource_range(0, 0, info.arrayLayers, info.mipLevels, aspect() & mask); + + verify(HERE), range.aspectMask; auto view = std::make_unique(*get_current_renderer(), this, real_mapping, range); auto result = view.get(); @@ -1110,197 +1343,6 @@ namespace vk VkDevice m_device; }; - class command_pool - { - vk::render_device *owner = nullptr; - VkCommandPool pool = nullptr; - - public: - command_pool() {} - ~command_pool() {} - - void create(vk::render_device &dev) - { - owner = &dev; - VkCommandPoolCreateInfo infos = {}; - infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - - CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool)); - } - - void destroy() - { - if (!pool) - return; - - vkDestroyCommandPool((*owner), pool, nullptr); - pool = nullptr; - } - - vk::render_device& get_owner() - { - return (*owner); - } - - operator VkCommandPool() - { - return pool; - } - }; - - class command_buffer - { - private: - bool is_open = false; - bool is_pending = false; - VkFence m_submit_fence = VK_NULL_HANDLE; - - protected: - vk::command_pool *pool = nullptr; - VkCommandBuffer commands = nullptr; - - public: - enum access_type_hint - { - flush_only, //Only to be submitted/opened/closed via command flush - all //Auxiliary, can be submitted/opened/closed at any time - } - access_hint = flush_only; - - enum command_buffer_data_flag : u32 - { - cb_has_occlusion_task = 1, - cb_has_blit_transfer = 2, - cb_has_dma_transfer = 4 - }; - u32 flags = 0; - - public: - command_buffer() {} - ~command_buffer() {} - - void create(vk::command_pool &cmd_pool, bool auto_reset = false) - { - VkCommandBufferAllocateInfo infos = {}; - infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - infos.commandBufferCount = 1; - infos.commandPool = (VkCommandPool)cmd_pool; - infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands)); - - if (auto_reset) - { - VkFenceCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence)); - } - - pool = &cmd_pool; - } - - void destroy() - { - vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands); - - if (m_submit_fence) - { - vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr); - } - } - - vk::command_pool& get_command_pool() const - { - return *pool; - } - - void clear_flags() - { - flags = 0; - } - - void set_flag(command_buffer_data_flag flag) - { - flags |= flag; - } - - operator VkCommandBuffer() const - { - return commands; - } - - bool is_recording() const - { - return is_open; - } - - void begin() - { - if (m_submit_fence && is_pending) - { - wait_for_fence(m_submit_fence); - is_pending = false; - - CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence)); - CHECK_RESULT(vkResetCommandBuffer(commands, 0)); - } - - if (is_open) - return; - - VkCommandBufferInheritanceInfo inheritance_info = {}; - inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - - VkCommandBufferBeginInfo begin_infos = {}; - begin_infos.pInheritanceInfo = &inheritance_info; - begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - CHECK_RESULT(vkBeginCommandBuffer(commands, &begin_infos)); - is_open = true; - } - - void end() - { - if (!is_open) - { - LOG_ERROR(RSX, "commandbuffer->end was called but commandbuffer is not in a recording state"); - return; - } - - CHECK_RESULT(vkEndCommandBuffer(commands)); - is_open = false; - } - - void submit(VkQueue queue, const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) - { - if (is_open) - { - LOG_ERROR(RSX, "commandbuffer->submit was called whilst the command buffer is in a recording state"); - return; - } - - if (fence == VK_NULL_HANDLE) - { - fence = m_submit_fence; - is_pending = (fence != VK_NULL_HANDLE); - } - - VkSubmitInfo infos = {}; - infos.commandBufferCount = 1; - infos.pCommandBuffers = &commands; - infos.pWaitDstStageMask = &pipeline_stage_flags; - infos.pWaitSemaphores = semaphores.data(); - infos.waitSemaphoreCount = static_cast(semaphores.size()); - infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - - acquire_global_submit_lock(); - CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); - release_global_submit_lock(); - - clear_flags(); - } - }; - class swapchain_image_WSI { VkImageView view = nullptr; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 47d0269de8..335fd28fc6 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -498,25 +498,44 @@ namespace vk void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) const { + const auto dst_aspect = dst->aspect(); + const auto dst_bpp = vk::get_format_texel_width(dst->format()); + for (const auto §ion : sections_to_transfer) { if (!section.src) continue; - VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst->info.format); - VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format); - VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 }; + const auto src_bpp = vk::get_format_texel_width(section.src->format()); + const bool typeless = section.src->aspect() != dst_aspect || src_bpp != dst_bpp; - if (section.src_w == section.dst_w && section.src_h == section.dst_h && - section.xform == surface_transform::identity) + section.src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + auto src_image = section.src; + if (UNLIKELY(typeless)) { - VkImageLayout old_src_layout = section.src->current_layout; - VkImageCopy copy_rgn; + src_image = vk::get_typeless_helper(dst->info.format, section.src_x + section.src_w, section.src_y + section.src_h); + src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + const u16 convert_w = u16(section.src_w * dst_bpp) / src_bpp; + const areai src_rect = coordi{{ section.src_x, section.src_y }, { convert_w, section.src_h }}; + const areai dst_rect = coordi{{ section.src_x, section.src_y }, { section.src_w, section.src_h }}; + vk::copy_image_typeless(cmd, section.src, src_image, src_rect, dst_rect, 1, section.src->aspect(), dst_aspect); + src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + } + + verify(HERE), src_image->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + // Final aspect mask of the 'final' transfer source + const auto new_src_aspect = src_image->aspect(); + + if (LIKELY(section.src_w == section.dst_w && section.src_h == section.dst_h && section.xform == surface_transform::identity)) + { + VkImageCopy copy_rgn; copy_rgn.srcOffset = { section.src_x, section.src_y, 0 }; copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 }; - copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { new_src_aspect, 0, 0, 1 }; copy_rgn.extent = { section.src_w, section.src_h, 1 }; if (dst->info.imageType == VK_IMAGE_TYPE_3D) @@ -528,77 +547,79 @@ namespace vk copy_rgn.dstSubresource.baseArrayLayer = section.dst_z; } - vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); - vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, ©_rgn); - vk::change_image_layout(cmd, section.src, old_src_layout, src_range); + vkCmdCopyImage(cmd, src_image->value, src_image->current_layout, dst->value, dst->current_layout, 1, ©_rgn); } else { verify(HERE), section.dst_z == 0; u16 dst_x = section.dst_x, dst_y = section.dst_y; + auto xform = section.xform; vk::image* _dst; - if (LIKELY(section.src->info.format == dst->info.format)) + if (LIKELY(src_image->info.format == dst->info.format)) { _dst = dst; } else { - _dst = vk::get_typeless_helper(section.src->info.format, dst->width(), dst->height() * 2); - vk::change_image_layout(cmd, _dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, src_range); + verify(HERE), !typeless; + + _dst = vk::get_typeless_helper(src_image->info.format, dst->width(), dst->height() * 2); + _dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } if (section.xform == surface_transform::identity) { - vk::copy_scaled_image(cmd, section.src->value, _dst->value, section.src->current_layout, _dst->current_layout, + vk::copy_scaled_image(cmd, src_image->value, _dst->value, section.src->current_layout, _dst->current_layout, coordi{ { section.src_x, section.src_y }, { section.src_w, section.src_h } }, coordi{ { section.dst_x, section.dst_y }, { section.dst_w, section.dst_h } }, - 1, src_aspect, section.src->info.format == _dst->info.format, - VK_FILTER_NEAREST, section.src->info.format, _dst->info.format); + 1, src_image->aspect(), src_image->info.format == _dst->info.format, + VK_FILTER_NEAREST, src_image->info.format, _dst->info.format); } else if (section.xform == surface_transform::argb_to_bgra) { - VkImageLayout old_src_layout = section.src->current_layout; VkBufferImageCopy copy{}; - copy.imageExtent = { section.src_w, section.src_h, 1 }; copy.imageOffset = { section.src_x, section.src_y, 0 }; - copy.imageSubresource = { src_aspect, 0, 0, 1 }; + copy.imageSubresource = { src_image->aspect(), 0, 0, 1 }; auto scratch_buf = vk::get_scratch_buffer(); - vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); - vkCmdCopyImageToBuffer(cmd, section.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, scratch_buf->value, 1, ©); + vkCmdCopyImageToBuffer(cmd, src_image->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, scratch_buf->value, 1, ©); - const auto length = section.src->width() * section.src->width() * 4; - vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + const auto mem_length = section.src_w * section.src_h * dst_bpp; + vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); auto shuffle_kernel = vk::get_compute_task(); - shuffle_kernel->run(cmd, scratch_buf, length); + shuffle_kernel->run(cmd, scratch_buf, mem_length); - vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); - auto tmp = vk::get_typeless_helper(section.src->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h); - vk::change_image_layout(cmd, tmp, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { src_aspect, 0, 1, 0, 1 }); - copy.imageOffset = { 0, 0, 0 }; + auto tmp = vk::get_typeless_helper(src_image->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h); + tmp->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + copy.imageOffset = { 0, 0, 0 }; vkCmdCopyBufferToImage(cmd, scratch_buf->value, tmp->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); - if (UNLIKELY(tmp == _dst)) + dst_x = 0; + dst_y = 0; + + if (section.src_w != section.dst_w || section.src_h != section.dst_h) { - dst_x = 0; - dst_y = section.src_h; + // Optionally scale if needed + if (UNLIKELY(tmp == _dst)) + { + dst_y = section.src_h; + } + + vk::copy_scaled_image(cmd, tmp->value, _dst->value, tmp->current_layout, _dst->current_layout, + areai{ 0, 0, section.src_w, (s32)section.src_h }, + coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } }, + 1, new_src_aspect, tmp->info.format == _dst->info.format, + VK_FILTER_NEAREST, tmp->info.format, _dst->info.format); } - - vk::copy_scaled_image(cmd, tmp->value, _dst->value, tmp->current_layout, _dst->current_layout, - areai{ 0, 0, (s32)section.src_w, (s32)section.src_h }, - coordi{ {dst_x, dst_y}, {section.dst_w, section.dst_h} }, - 1, src_aspect, section.src->info.format == _dst->info.format, - VK_FILTER_NEAREST, tmp->info.format, _dst->info.format); - - vk::change_image_layout(cmd, section.src, old_src_layout, src_range); } else { @@ -608,18 +629,19 @@ namespace vk if (UNLIKELY(_dst != dst)) { // Casting comes after the scaling! - VkImageCopy copy_rgn; copy_rgn.srcOffset = { s32(dst_x), s32(dst_y), 0 }; copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 }; - copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; - copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; + copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { _dst->aspect(), 0, 0, 1 }; copy_rgn.extent = { section.dst_w, section.dst_h, 1 }; - vk::change_image_layout(cmd, _dst, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); + _dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); vkCmdCopyImage(cmd, _dst->value, _dst->current_layout, dst->value, dst->current_layout, 1, ©_rgn); } } + + section.src->pop_layout(cmd); } } @@ -664,24 +686,9 @@ namespace vk std::unique_ptr image; std::unique_ptr view; - VkImageAspectFlags aspect; VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); - - if (source) - { - aspect = vk::get_aspect_flags(source->info.format); - if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT || - vk::get_format_texel_width(dst_format) != vk::get_format_texel_width(source->info.format)) - { - //HACK! Should use typeless transfer - dst_format = source->info.format; - } - } - else - { - aspect = vk::get_aspect_flags(dst_format); - } + VkImageAspectFlags aspect = vk::get_aspect_flags(dst_format); image.reset(new vk::viewable_image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, image_type, @@ -714,22 +721,17 @@ namespace vk if (copy) { - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - VkImageLayout old_src_layout = source->current_layout; + std::vector region = + {{ + source, + surface_transform::identity, + x, y, 0, 0, 0, + w, h, w, h + }}; - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - vk::change_image_layout(cmd, source, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); - - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { (s32)x, (s32)y, 0 }; - copy_rgn.dstOffset = { (s32)0, (s32)0, 0 }; - copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; - copy_rgn.extent = { w, h, 1 }; - - vkCmdCopyImage(cmd, source->value, source->current_layout, image->value, image->current_layout, 1, ©_rgn); - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); - vk::change_image_layout(cmd, source, old_src_layout, subresource_range); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + copy_transfer_regions_impl(cmd, image.get(), region); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } const u32 resource_memory = w * h * 4; //Rough approximate