From 3d935b64f2ba0a749befbc32f9a26c022b244366 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 1 Jul 2017 00:24:41 +0300 Subject: [PATCH] rsx/gl/vk: Enable contents transfer when a new framebuffer is created and not cleared --- rpcs3/Emu/RSX/Common/surface_store.h | 11 +++- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 2 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 56 +++++++++++++++-- rpcs3/Emu/RSX/GL/GLRenderTargets.h | 19 ++++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 68 ++++++++++++++++++++- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 29 ++++++++- 6 files changed, 166 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 154464ab18..1ff4703a01 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -93,7 +93,7 @@ namespace rsx auto It = m_render_targets_storage.find(address); // TODO: Fix corner cases // This doesn't take overlapping surface(s) into account. - // Invalidated surface(s) should also copy their content to the new resources. + surface_type old_surface = nullptr; if (It != m_render_targets_storage.end()) { surface_storage_type &rtt = It->second; @@ -102,11 +102,13 @@ namespace rsx Traits::prepare_rtt_for_drawing(command_list, Traits::get(rtt)); return Traits::get(rtt); } + + old_surface = Traits::get(rtt); invalidated_resources.push_back(std::move(rtt)); m_render_targets_storage.erase(address); } - m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, std::forward(extra_params)...); + m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, old_surface, std::forward(extra_params)...); return Traits::get(m_render_targets_storage[address]); } @@ -117,6 +119,7 @@ namespace rsx surface_depth_format depth_format, size_t width, size_t height, Args&&... extra_params) { + surface_type old_surface = nullptr; auto It = m_depth_stencil_storage.find(address); if (It != m_depth_stencil_storage.end()) { @@ -126,11 +129,13 @@ namespace rsx Traits::prepare_ds_for_drawing(command_list, Traits::get(ds)); return Traits::get(ds); } + + old_surface = Traits::get(ds); invalidated_resources.push_back(std::move(ds)); m_depth_stencil_storage.erase(address); } - m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, std::forward(extra_params)...); + m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, old_surface, std::forward(extra_params)...); return Traits::get(m_depth_stencil_storage[address]); } public: diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 363d245005..309b588eaa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -25,6 +25,7 @@ struct render_target_traits ComPtr create_new_surface( u32 address, surface_color_format color_format, size_t width, size_t height, + ID3D12Resource* /*old*/, gsl::not_null device, const std::array &clear_color, float, u8) { DXGI_FORMAT dxgi_format = get_color_surface_format(color_format); @@ -73,6 +74,7 @@ struct render_target_traits ComPtr create_new_surface( u32 address, surface_depth_format surfaceDepthFormat, size_t width, size_t height, + ID3D12Resource* /*old*/, gsl::not_null device, const std::array& , float clear_depth, u8 clear_stencil) { D3D12_CLEAR_VALUE clear_depth_value = {}; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 09159e5e95..c79172f94c 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -349,6 +349,21 @@ void GLGSRender::end() //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared + auto copy_rtt_contents = [](gl::render_target *surface) + { + //Copy data from old contents onto this one + //1. Clip a rectangular region defning the data + //2. Perform a GPU blit + u16 parent_w = surface->old_contents->width(); + u16 parent_h = surface->old_contents->height(); + u16 copy_w, copy_h; + + std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); + glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1); + surface->set_cleared(); + surface->old_contents = nullptr; + }; + gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && !ds->cleared()) { @@ -360,13 +375,33 @@ void GLGSRender::end() glClearStencil(255); glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - + + if (g_cfg.video.strict_rendering_mode) + { + //Copy previous data if any + if (ds->old_contents != nullptr) + copy_rtt_contents(ds); + } + glDepthMask(rsx::method_registers.depth_write_enabled()); glEnable(GL_SCISSOR_TEST); ds->set_cleared(); } + if (g_cfg.video.strict_rendering_mode) + { + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (std::get<0>(rtt) != 0) + { + auto surface = std::get<1>(rtt); + if (!surface->cleared() && surface->old_contents != nullptr) + copy_rtt_contents(surface); + } + } + } + std::chrono::time_point textures_start = steady_clock::now(); //Setup textures @@ -710,7 +745,10 @@ void GLGSRender::clear_surface(u32 arg) gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && !ds->cleared()) + { ds->set_cleared(); + ds->old_contents = nullptr; + } } if (surface_depth_format == rsx::surface_depth_format::z24s8 && (arg & 0x2)) @@ -734,6 +772,15 @@ void GLGSRender::clear_surface(u32 arg) glClearColor(clear_r / 255.f, clear_g / 255.f, clear_b / 255.f, clear_a / 255.f); mask |= GLenum(gl::buffers::color); + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (std::get<0>(rtt) != 0) + { + std::get<1>(rtt)->set_cleared(true); + std::get<1>(rtt)->old_contents = nullptr; + } + } } glClear(mask); @@ -904,13 +951,10 @@ void GLGSRender::flip(int buffer) __glcheck m_flip_fbo.color = *render_target_texture; __glcheck m_flip_fbo.read_buffer(m_flip_fbo.color); } - else if (draw_fbo) - { - //HACK! it's here, because textures cache isn't implemented correctly! - flip_fbo = &draw_fbo; - } else { + LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU"); + if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height }) { m_flip_tex_color.recreate(gl::texture::target::texture2D); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 885a2759e4..123943bfc0 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -62,6 +62,7 @@ namespace gl texture::internal_format compatible_internal_format = texture::internal_format::rgba8; public: + render_target *old_contents = nullptr; render_target() {} @@ -163,7 +164,8 @@ struct gl_render_target_traits u32 /*address*/, rsx::surface_color_format surface_color_format, size_t width, - size_t height + size_t height, + gl::render_target* old_surface ) { std::unique_ptr result(new gl::render_target()); @@ -187,6 +189,9 @@ struct gl_render_target_traits __glcheck result->pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1); __glcheck result->pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1); + if (old_surface != nullptr && old_surface->get_compatible_internal_format() == internal_fmt) + result->old_contents = old_surface; + return result; } @@ -195,7 +200,8 @@ struct gl_render_target_traits u32 /*address*/, rsx::surface_depth_format surface_depth_format, size_t width, - size_t height + size_t height, + gl::render_target* old_surface ) { std::unique_ptr result(new gl::render_target()); @@ -222,6 +228,9 @@ struct gl_render_target_traits result->set_native_pitch(native_pitch); result->set_compatible_format(format.internal_format); + if (old_surface != nullptr && old_surface->get_compatible_internal_format() == format.internal_format) + result->old_contents = old_surface; + return result; } @@ -235,10 +244,10 @@ struct gl_render_target_traits static void invalidate_depth_surface_contents(void *, gl::render_target *ds) { ds->set_cleared(false); } static - bool rtt_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_color_format, size_t width, size_t height) + bool rtt_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_color_format format, size_t width, size_t height) { - // TODO: check format - return rtt->width() == width && rtt->height() == height; + auto internal_fmt = rsx::internals::sized_internal_format(format); + return rtt->get_compatible_internal_format() == internal_fmt && rtt->width() == width && rtt->height() == height; } static diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d653ff9fd2..56d162f282 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -955,7 +955,58 @@ void VKGSRender::end() return; } - close_render_pass(); //Texture upload stuff conflicts active RPs + close_render_pass(); //Texture upload stuff conflicts active RPs + + if (g_cfg.video.strict_rendering_mode) + { + auto copy_rtt_contents = [&](vk::render_target* surface) + { + const VkImageAspectFlags aspect = surface->attachment_aspect_flag; + + const u16 parent_w = surface->old_contents->width(); + const u16 parent_h = surface->old_contents->height(); + u16 copy_w, copy_h; + + std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); + + VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; + VkImageLayout old_layout = surface->current_layout; + + vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + + VkImageCopy copy_rgn; + copy_rgn.srcOffset = { 0, 0, 0 }; + copy_rgn.dstOffset = { 0, 0, 0 }; + copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; + copy_rgn.extent = { copy_w, copy_h, 1 }; + + vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, ©_rgn); + vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range); + + surface->dirty = false; + surface->old_contents = nullptr; + }; + + //Prepare surfaces if needed + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (std::get<0>(rtt) != 0) + { + auto surface = std::get<1>(rtt); + + if (surface->dirty && surface->old_contents != nullptr) + copy_rtt_contents(surface); + } + } + + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) + { + if (ds->dirty && ds->old_contents != nullptr) + copy_rtt_contents(ds); + } + } std::chrono::time_point vertex_start0 = steady_clock::now(); auto upload_info = upload_vertex_data(); @@ -1072,6 +1123,7 @@ void VKGSRender::end() VkClearRect clear_rect = { 0, 0, m_framebuffer_to_clean.back()->width(), m_framebuffer_to_clean.back()->height(), 0, 1 }; VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value }; vkCmdClearAttachments(*m_current_command_buffer, 1, &clear_desc, 1, &clear_rect); + ds->dirty = false; } } @@ -1245,6 +1297,15 @@ void VKGSRender::clear_surface(u32 mask) clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, (uint32_t)index, color_clear_values }); clear_regions.push_back(region); } + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (std::get<0>(rtt) != 0) + { + std::get<1>(rtt)->dirty = false; + std::get<1>(rtt)->old_contents = nullptr; + } + } } if (mask & 0x3) @@ -1259,7 +1320,10 @@ void VKGSRender::clear_surface(u32 mask) if (mask & 0x3) { if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0) + { std::get<1>(m_rtts.m_bound_depth_stencil)->dirty = false; + std::get<1>(m_rtts.m_bound_depth_stencil)->old_contents = nullptr; + } } } @@ -1919,8 +1983,6 @@ void VKGSRender::flip(int buffer) u32 buffer_height = gcm_buffers[buffer].height; u32 buffer_pitch = gcm_buffers[buffer].pitch; - rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); - areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); coordi aspect_ratio; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 09d07fa3b9..1532636130 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -15,6 +15,8 @@ namespace vk u16 native_pitch = 0; VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; + render_target *old_contents = nullptr; //Data occupying the memory location that this surface is replacing + render_target(vk::render_device &dev, uint32_t memory_type_index, uint32_t access_flags, @@ -43,7 +45,12 @@ namespace rsx using command_list_type = vk::command_buffer*; using download_buffer_object = void*; - static std::unique_ptr create_new_surface(u32, surface_color_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &, const vk::memory_type_mapping &mem_mapping) + static std::unique_ptr create_new_surface( + u32 /*address*/, + surface_color_format format, + size_t width, size_t height, + vk::render_target* old_surface, + vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &, const vk::memory_type_mapping &mem_mapping) { auto fmt = vk::get_compatible_surface_format(format); VkFormat requested_format = fmt.first; @@ -74,10 +81,22 @@ namespace rsx rtt->native_component_map = fmt.second; rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format); + + if (old_surface != nullptr && old_surface->info.format == requested_format) + { + rtt->old_contents = old_surface; + rtt->dirty = true; + } + return rtt; } - static std::unique_ptr create_new_surface(u32, surface_depth_format format, size_t width, size_t height, vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) + static std::unique_ptr create_new_surface( + u32 /* address */, + surface_depth_format format, + size_t width, size_t height, + vk::render_target* old_surface, + vk::render_device &device, vk::command_buffer *cmd, const vk::gpu_formats_support &support, const vk::memory_type_mapping &mem_mapping) { VkFormat requested_format = vk::get_compatible_depth_surface_format(support, format); VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT); @@ -115,6 +134,12 @@ namespace rsx ds->attachment_aspect_flag = range.aspectMask; + if (old_surface != nullptr && old_surface->info.format == requested_format) + { + ds->old_contents = old_surface; + ds->dirty = true; + } + return ds; }