rsx: Implement deferred buffer allocations for GPU write

This commit is contained in:
kd-11 2023-03-01 23:04:41 +03:00 committed by kd-11
parent 0178b20983
commit a3ededdcf1
7 changed files with 116 additions and 40 deletions

View file

@ -796,7 +796,7 @@ namespace rsx
return get_subresources_layout_impl(texture); return get_subresources_layout_impl(texture);
} }
texture_memory_info upload_texture_subresource(std::span<std::byte> dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps) texture_memory_info upload_texture_subresource(rsx::io_buffer& dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps)
{ {
u16 w = src_layout.width_in_block; u16 w = src_layout.width_in_block;
u16 h = src_layout.height_in_block; u16 h = src_layout.height_in_block;
@ -825,13 +825,13 @@ namespace rsx
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
{ {
copy_decoded_rb_rg_block::copy_mipmap_level<true>(utils::bless<u32>(dst_buffer), utils::bless<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); copy_decoded_rb_rg_block::copy_mipmap_level<true>(dst_buffer.as_span<u32>(), utils::bless<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
break; break;
} }
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
{ {
copy_decoded_rb_rg_block::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); copy_decoded_rb_rg_block::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
break; break;
} }
@ -839,9 +839,9 @@ namespace rsx
case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_R6G5B5:
{ {
if (is_swizzled) if (is_swizzled)
copy_rgb655_block_swizzled::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment)); copy_rgb655_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment));
else else
copy_rgb655_block::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment), src_layout.pitch_in_block); copy_rgb655_block::copy_mipmap_level(dst_buffer.as_span<u16>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u16>(w, caps.alignment), src_layout.pitch_in_block);
break; break;
} }
@ -855,49 +855,49 @@ namespace rsx
case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_R6G5B5:
{ {
if (is_swizzled) if (is_swizzled)
convert_16_block_32_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_rgb655_to_bgra8); convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_rgb655_to_bgra8);
else else
convert_16_block_32::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb655_to_bgra8); convert_16_block_32::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb655_to_bgra8);
break; break;
} }
case CELL_GCM_TEXTURE_D1R5G5B5: case CELL_GCM_TEXTURE_D1R5G5B5:
{ {
if (is_swizzled) if (is_swizzled)
convert_16_block_32_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_d1rgb5_to_bgra8); convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_d1rgb5_to_bgra8);
else else
convert_16_block_32::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_d1rgb5_to_bgra8); convert_16_block_32::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_d1rgb5_to_bgra8);
break; break;
} }
case CELL_GCM_TEXTURE_A1R5G5B5: case CELL_GCM_TEXTURE_A1R5G5B5:
{ {
if (is_swizzled) if (is_swizzled)
convert_16_block_32_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_a1rgb5_to_bgra8); convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_a1rgb5_to_bgra8);
else else
convert_16_block_32::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_a1rgb5_to_bgra8); convert_16_block_32::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_a1rgb5_to_bgra8);
break; break;
} }
case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_A4R4G4B4:
{ {
if (is_swizzled) if (is_swizzled)
convert_16_block_32_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_argb4_to_bgra8); convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_argb4_to_bgra8);
else else
convert_16_block_32::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_argb4_to_bgra8); convert_16_block_32::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_argb4_to_bgra8);
break; break;
} }
case CELL_GCM_TEXTURE_R5G5B5A1: case CELL_GCM_TEXTURE_R5G5B5A1:
{ {
if (is_swizzled) if (is_swizzled)
convert_16_block_32_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_rgb5a1_to_bgra8); convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_rgb5a1_to_bgra8);
else else
convert_16_block_32::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb5a1_to_bgra8); convert_16_block_32::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb5a1_to_bgra8);
break; break;
} }
case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_R5G6B5:
{ {
if (is_swizzled) if (is_swizzled)
convert_16_block_32_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_rgb565_to_bgra8); convert_16_block_32_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), &convert_rgb565_to_bgra8);
else else
convert_16_block_32::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb565_to_bgra8); convert_16_block_32::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u16>>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block, &convert_rgb565_to_bgra8);
break; break;
} }
#endif #endif
@ -960,13 +960,13 @@ namespace rsx
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
// This is only supported using Nvidia OpenGL. // This is only supported using Nvidia OpenGL.
// Remove the VTC tiling to support ATI and Vulkan. // Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(utils::bless<u64>(dst_buffer), utils::bless<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block); copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span<u64>(), utils::bless<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
} }
else if (is_3d && !is_po2 && caps.supports_vtc_decoding) else if (is_3d && !is_po2 && caps.supports_vtc_decoding)
{ {
// In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one.
// We need to compress the 2D-planar DXT input into a VTC output // We need to compress the 2D-planar DXT input into a VTC output
copy_linear_block_to_vtc::copy_mipmap_level(utils::bless<u64>(dst_buffer), utils::bless<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block); copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span<u64>(), utils::bless<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
} }
else if (caps.supports_zero_copy) else if (caps.supports_zero_copy)
{ {
@ -975,7 +975,7 @@ namespace rsx
} }
else else
{ {
copy_unmodified_block::copy_mipmap_level(utils::bless<u64>(dst_buffer), utils::bless<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u64>(), utils::bless<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
} }
break; break;
} }
@ -991,13 +991,13 @@ namespace rsx
// PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // PS3 uses the Nvidia VTC memory layout for compressed 3D textures.
// This is only supported using Nvidia OpenGL. // This is only supported using Nvidia OpenGL.
// Remove the VTC tiling to support ATI and Vulkan. // Remove the VTC tiling to support ATI and Vulkan.
copy_unmodified_block_vtc::copy_mipmap_level(utils::bless<u128>(dst_buffer), utils::bless<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block); copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span<u128>(), utils::bless<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
} }
else if (is_3d && !is_po2 && caps.supports_vtc_decoding) else if (is_3d && !is_po2 && caps.supports_vtc_decoding)
{ {
// In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one.
// We need to compress the 2D-planar DXT input into a VTC output // We need to compress the 2D-planar DXT input into a VTC output
copy_linear_block_to_vtc::copy_mipmap_level(utils::bless<u128>(dst_buffer), utils::bless<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block); copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span<u128>(), utils::bless<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
} }
else if (caps.supports_zero_copy) else if (caps.supports_zero_copy)
{ {
@ -1006,7 +1006,7 @@ namespace rsx
} }
else else
{ {
copy_unmodified_block::copy_mipmap_level(utils::bless<u128>(dst_buffer), utils::bless<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u128>(), utils::bless<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
} }
break; break;
} }
@ -1021,7 +1021,7 @@ namespace rsx
{ {
if (is_swizzled) if (is_swizzled)
{ {
copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless<u8>(dst_buffer), utils::bless<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), utils::bless<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
} }
else if (caps.supports_zero_copy) else if (caps.supports_zero_copy)
{ {
@ -1030,7 +1030,7 @@ namespace rsx
} }
else else
{ {
copy_unmodified_block::copy_mipmap_level(utils::bless<u8>(dst_buffer), utils::bless<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), utils::bless<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
} }
} }
else else
@ -1064,11 +1064,11 @@ namespace rsx
} }
else if (word_size == 2) else if (word_size == 2)
{ {
copy_unmodified_block::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), utils::bless<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
} }
else if (word_size == 4) else if (word_size == 4)
{ {
copy_unmodified_block::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
} }
} }
else else
@ -1076,16 +1076,16 @@ namespace rsx
if (word_size == 2) if (word_size == 2)
{ {
if (is_swizzled) if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), utils::bless<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else else
copy_unmodified_block::copy_mipmap_level(utils::bless<u16>(dst_buffer), utils::bless<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), utils::bless<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
} }
else if (word_size == 4) else if (word_size == 4)
{ {
if (is_swizzled) if (is_swizzled)
copy_unmodified_block_swizzled::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
else else
copy_unmodified_block::copy_mipmap_level(utils::bless<u32>(dst_buffer), utils::bless<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), utils::bless<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
} }
} }
} }

View file

@ -4,6 +4,7 @@
#include <span> #include <span>
#include <vector> #include <vector>
#include "io_buffer.h"
namespace rsx namespace rsx
{ {
@ -216,7 +217,7 @@ namespace rsx
std::vector<subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture); std::vector<subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture);
std::vector<subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture); std::vector<subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture);
texture_memory_info upload_texture_subresource(std::span<std::byte> dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps); texture_memory_info upload_texture_subresource(rsx::io_buffer& dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps);
u8 get_format_block_size_in_bytes(int format); u8 get_format_block_size_in_bytes(int format);
u8 get_format_block_size_in_texel(int format); u8 get_format_block_size_in_texel(int format);

View file

@ -607,7 +607,8 @@ namespace gl
for (const rsx::subresource_layout& layout : input_layouts) for (const rsx::subresource_layout& layout : input_layouts)
{ {
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps); rsx::io_buffer io_buf = staging_buffer;
upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
switch (dst->get_target()) switch (dst->get_target())
{ {
@ -704,8 +705,9 @@ namespace gl
dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size }; dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size };
} }
rsx::io_buffer io_buf = dst_buffer;
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096); caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096);
auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps); auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
// Define upload region // Define upload region
coord3u region; coord3u region;

View file

@ -946,10 +946,6 @@ namespace vk
// Calculate estimated memory utilization for this subresource // Calculate estimated memory utilization for this subresource
image_linear_size = row_pitch * layout.height_in_block * layout.depth; image_linear_size = row_pitch * layout.height_in_block * layout.depth;
// Map with extra padding bytes in case of realignment
offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8);
void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8);
// Only do GPU-side conversion if occupancy is good // Only do GPU-side conversion if occupancy is good
if (check_caps) if (check_caps)
{ {
@ -960,8 +956,16 @@ namespace vk
check_caps = false; check_caps = false;
} }
std::span<std::byte> mapped{ static_cast<std::byte*>(mapped_buffer), image_linear_size }; auto buf_allocator = [&]() -> std::tuple<void*, usz>
opt = upload_texture_subresource(mapped, layout, format, is_swizzled, caps); {
// Map with extra padding bytes in case of realignment
offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8);
void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8);
return { mapped_buffer, image_linear_size };
};
auto io_buf = rsx::io_buffer(buf_allocator);
opt = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
upload_heap.unmap(); upload_heap.unmap();
copy_regions.push_back({}); copy_regions.push_back({});

View file

@ -591,6 +591,7 @@
<ClInclude Include="Emu\vfs_config.h" /> <ClInclude Include="Emu\vfs_config.h" />
<ClInclude Include="Loader\disc.h" /> <ClInclude Include="Loader\disc.h" />
<ClInclude Include="Loader\mself.hpp" /> <ClInclude Include="Loader\mself.hpp" />
<ClInclude Include="io_buffer.h" />
<ClInclude Include="util\atomic.hpp" /> <ClInclude Include="util\atomic.hpp" />
<ClInclude Include="util\image_sink.h" /> <ClInclude Include="util\image_sink.h" />
<ClInclude Include="util\video_provider.h" /> <ClInclude Include="util\video_provider.h" />

View file

@ -2308,6 +2308,9 @@
<ClInclude Include="Emu\NP\upnp_config.h"> <ClInclude Include="Emu\NP\upnp_config.h">
<Filter>Emu\NP</Filter> <Filter>Emu\NP</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="io_buffer.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl"> <None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">

65
rpcs3/io_buffer.h Normal file
View file

@ -0,0 +1,65 @@
#pragma once
#include <util/types.hpp>
#include <span>
#include <vector>
#include <functional>
namespace rsx
{
template <typename T>
concept SpanLike = requires(T t)
{
{ t.data() } -> std::convertible_to<void*>;
{ t.size_bytes() } -> std::convertible_to<usz>;
};
template <typename T>
concept Integral = std::is_integral_v<T> || std::is_same_v<T, u128>;
class io_buffer
{
void* m_ptr = nullptr;
usz m_size = 0;
std::function<std::tuple<void*, usz> ()> m_allocator = nullptr;
public:
io_buffer() = default;
template <SpanLike T>
io_buffer(T& container)
{
m_ptr = reinterpret_cast<void*>(container.data());
m_size = container.size_bytes();
}
io_buffer(std::function<std::tuple<void*, usz> ()> allocator)
{
ensure(allocator);
m_allocator = allocator;
}
template<Integral T>
T* data()
{
if (!m_ptr && m_allocator)
{
std::tie(m_ptr, m_size) = m_allocator();
}
return static_cast<T*>(m_ptr);
}
usz size() const
{
return m_size;
}
template<Integral T>
std::span<T> as_span()
{
const auto bytes = data<T>();
return { bytes, m_size / sizeof(T) };
}
};
}