From a3ededdcf148fcbb05ffa200ea74c762e64652ff Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 1 Mar 2023 23:04:41 +0300 Subject: [PATCH] rsx: Implement deferred buffer allocations for GPU write --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 62 ++++++++++++------------- rpcs3/Emu/RSX/Common/TextureUtils.h | 3 +- rpcs3/Emu/RSX/GL/GLTexture.cpp | 6 ++- rpcs3/Emu/RSX/VK/VKTexture.cpp | 16 ++++--- rpcs3/emucore.vcxproj | 1 + rpcs3/emucore.vcxproj.filters | 3 ++ rpcs3/io_buffer.h | 65 +++++++++++++++++++++++++++ 7 files changed, 116 insertions(+), 40 deletions(-) create mode 100644 rpcs3/io_buffer.h diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 0a70949d25..30d4abbf8c 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -796,7 +796,7 @@ namespace rsx return get_subresources_layout_impl(texture); } - texture_memory_info upload_texture_subresource(std::span dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps) + texture_memory_info upload_texture_subresource(rsx::io_buffer& dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps) { u16 w = src_layout.width_in_block; u16 h = src_layout.height_in_block; @@ -825,13 +825,13 @@ namespace rsx case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: { - copy_decoded_rb_rg_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); + copy_decoded_rb_rg_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); break; } case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: { - copy_decoded_rb_rg_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); + copy_decoded_rb_rg_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); break; } @@ -839,9 +839,9 @@ namespace rsx case CELL_GCM_TEXTURE_R6G5B5: { if (is_swizzled) - copy_rgb655_block_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment)); + copy_rgb655_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment)); else - copy_rgb655_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_rgb655_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } @@ -855,49 +855,49 @@ namespace rsx case CELL_GCM_TEXTURE_R6G5B5: { if (is_swizzled) - convert_16_block_32_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_rgb655_to_bgra8); + convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_rgb655_to_bgra8); else - convert_16_block_32::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb655_to_bgra8); + convert_16_block_32::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb655_to_bgra8); break; } case CELL_GCM_TEXTURE_D1R5G5B5: { if (is_swizzled) - convert_16_block_32_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_d1rgb5_to_bgra8); + convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_d1rgb5_to_bgra8); else - convert_16_block_32::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_d1rgb5_to_bgra8); + convert_16_block_32::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_d1rgb5_to_bgra8); break; } case CELL_GCM_TEXTURE_A1R5G5B5: { if (is_swizzled) - convert_16_block_32_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_a1rgb5_to_bgra8); + convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_a1rgb5_to_bgra8); else - convert_16_block_32::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_a1rgb5_to_bgra8); + convert_16_block_32::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_a1rgb5_to_bgra8); break; } case CELL_GCM_TEXTURE_A4R4G4B4: { if (is_swizzled) - convert_16_block_32_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_argb4_to_bgra8); + convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_argb4_to_bgra8); else - convert_16_block_32::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_argb4_to_bgra8); + convert_16_block_32::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_argb4_to_bgra8); break; } case CELL_GCM_TEXTURE_R5G5B5A1: { if (is_swizzled) - convert_16_block_32_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_rgb5a1_to_bgra8); + convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_rgb5a1_to_bgra8); else - convert_16_block_32::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb5a1_to_bgra8); + convert_16_block_32::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb5a1_to_bgra8); break; } case CELL_GCM_TEXTURE_R5G6B5: { if (is_swizzled) - convert_16_block_32_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_rgb565_to_bgra8); + convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), &convert_rgb565_to_bgra8); else - convert_16_block_32::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb565_to_bgra8); + convert_16_block_32::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb565_to_bgra8); break; } #endif @@ -960,13 +960,13 @@ namespace rsx // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); } else if (is_3d && !is_po2 && caps.supports_vtc_decoding) { // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // We need to compress the 2D-planar DXT input into a VTC output - copy_linear_block_to_vtc::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); } else if (caps.supports_zero_copy) { @@ -975,7 +975,7 @@ namespace rsx } else { - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); } break; } @@ -991,13 +991,13 @@ namespace rsx // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); } else if (is_3d && !is_po2 && caps.supports_vtc_decoding) { // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // We need to compress the 2D-planar DXT input into a VTC output - copy_linear_block_to_vtc::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); } else if (caps.supports_zero_copy) { @@ -1006,7 +1006,7 @@ namespace rsx } else { - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); } break; } @@ -1021,7 +1021,7 @@ namespace rsx { if (is_swizzled) { - copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); } else if (caps.supports_zero_copy) { @@ -1030,7 +1030,7 @@ namespace rsx } else { - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } } else @@ -1064,11 +1064,11 @@ namespace rsx } else if (word_size == 2) { - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } else if (word_size == 4) { - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } } else @@ -1076,16 +1076,16 @@ namespace rsx if (word_size == 2) { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); else - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } else if (word_size == 4) { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); else - copy_unmodified_block::copy_mipmap_level(utils::bless(dst_buffer), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), utils::bless>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } } } diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 6061849e4c..dd64418dd3 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -4,6 +4,7 @@ #include #include +#include "io_buffer.h" namespace rsx { @@ -216,7 +217,7 @@ namespace rsx std::vector get_subresources_layout(const rsx::fragment_texture &texture); std::vector get_subresources_layout(const rsx::vertex_texture &texture); - texture_memory_info upload_texture_subresource(std::span dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps); + texture_memory_info upload_texture_subresource(rsx::io_buffer& dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps); u8 get_format_block_size_in_bytes(int format); u8 get_format_block_size_in_texel(int format); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 6e2522532d..2ae9aa4110 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -607,7 +607,8 @@ namespace gl for (const rsx::subresource_layout& layout : input_layouts) { - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps); + rsx::io_buffer io_buf = staging_buffer; + upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); switch (dst->get_target()) { @@ -704,8 +705,9 @@ namespace gl dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; } + rsx::io_buffer io_buf = dst_buffer; caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096); - auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps); + auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); // Define upload region coord3u region; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index b71f2d24f1..8ba91d9204 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -946,10 +946,6 @@ namespace vk // Calculate estimated memory utilization for this subresource image_linear_size = row_pitch * layout.height_in_block * layout.depth; - // Map with extra padding bytes in case of realignment - offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8); - void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8); - // Only do GPU-side conversion if occupancy is good if (check_caps) { @@ -960,8 +956,16 @@ namespace vk check_caps = false; } - std::span mapped{ static_cast(mapped_buffer), image_linear_size }; - opt = upload_texture_subresource(mapped, layout, format, is_swizzled, caps); + auto buf_allocator = [&]() -> std::tuple + { + // Map with extra padding bytes in case of realignment + offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8); + void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8); + return { mapped_buffer, image_linear_size }; + }; + + auto io_buf = rsx::io_buffer(buf_allocator); + opt = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); upload_heap.unmap(); copy_regions.push_back({}); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index c3d2018dc3..d88f36f6f2 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -591,6 +591,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index c6b12bb721..2dd35f162f 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2308,6 +2308,9 @@ Emu\NP + + Emu\GPU\RSX\Common + diff --git a/rpcs3/io_buffer.h b/rpcs3/io_buffer.h new file mode 100644 index 0000000000..5f4dd32331 --- /dev/null +++ b/rpcs3/io_buffer.h @@ -0,0 +1,65 @@ +#pragma once +#include +#include +#include +#include + +namespace rsx +{ + template + concept SpanLike = requires(T t) + { + { t.data() } -> std::convertible_to; + { t.size_bytes() } -> std::convertible_to; + }; + + template + concept Integral = std::is_integral_v || std::is_same_v; + + class io_buffer + { + void* m_ptr = nullptr; + usz m_size = 0; + + std::function ()> m_allocator = nullptr; + + public: + io_buffer() = default; + + template + io_buffer(T& container) + { + m_ptr = reinterpret_cast(container.data()); + m_size = container.size_bytes(); + } + + io_buffer(std::function ()> allocator) + { + ensure(allocator); + m_allocator = allocator; + } + + template + T* data() + { + if (!m_ptr && m_allocator) + { + std::tie(m_ptr, m_size) = m_allocator(); + } + + return static_cast(m_ptr); + } + + usz size() const + { + return m_size; + } + + template + std::span as_span() + { + const auto bytes = data(); + return { bytes, m_size / sizeof(T) }; + } + }; +}