mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 00:11:24 +12:00
rsx/vk: DMA stuff
This commit is contained in:
parent
b96864c7e6
commit
7766076042
7 changed files with 195 additions and 59 deletions
|
@ -296,6 +296,55 @@ struct copy_rgb655_block_swizzled
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
/**
|
||||||
|
* Generates copy instructions required to build the texture GPU side without actually copying anything.
|
||||||
|
* Returns a set of addresses and data lengths to use. This can be used to generate a GPU task to avoid CPU doing the heavy lifting.
|
||||||
|
*/
|
||||||
|
std::vector<rsx::memory_transfer_cmd>
|
||||||
|
build_transfer_cmds(const void* src, u16 block_size_in_bytes, u16 width_in_block, u16 row_count, u16 depth, u8 border, u32 dst_pitch_in_block, u32 src_pitch_in_block)
|
||||||
|
{
|
||||||
|
std::vector<rsx::memory_transfer_cmd> result;
|
||||||
|
|
||||||
|
if (src_pitch_in_block == dst_pitch_in_block && !border)
|
||||||
|
{
|
||||||
|
// Fast copy
|
||||||
|
rsx::memory_transfer_cmd cmd;
|
||||||
|
cmd.src = src;
|
||||||
|
cmd.dst = nullptr;
|
||||||
|
cmd.length = src_pitch_in_block * block_size_in_bytes * row_count * depth;
|
||||||
|
return { cmd };
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 width_in_bytes = width_in_block * block_size_in_bytes;
|
||||||
|
const u32 src_pitch_in_bytes = src_pitch_in_block * block_size_in_bytes;
|
||||||
|
const u32 dst_pitch_in_bytes = dst_pitch_in_block * block_size_in_bytes;
|
||||||
|
|
||||||
|
const u32 h_porch = border * block_size_in_bytes;
|
||||||
|
const u32 v_porch = src_pitch_in_bytes * border;
|
||||||
|
|
||||||
|
auto src_ = static_cast<const char*>(src) + h_porch;
|
||||||
|
auto dst_ = static_cast<const char*>(nullptr);
|
||||||
|
|
||||||
|
for (int layer = 0; layer < depth; ++layer)
|
||||||
|
{
|
||||||
|
// Front
|
||||||
|
src_ += v_porch;
|
||||||
|
|
||||||
|
for (int row = 0; row < row_count; ++row)
|
||||||
|
{
|
||||||
|
rsx::memory_transfer_cmd cmd{ dst_, src_, width_in_bytes };
|
||||||
|
result.push_back(cmd);
|
||||||
|
src_ += src_pitch_in_bytes;
|
||||||
|
dst_ += dst_pitch_in_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Back
|
||||||
|
src_ += v_porch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Texture upload template.
|
* Texture upload template.
|
||||||
*
|
*
|
||||||
|
@ -533,7 +582,7 @@ namespace rsx
|
||||||
return get_subresources_layout_impl(texture);
|
return get_subresources_layout_impl(texture);
|
||||||
}
|
}
|
||||||
|
|
||||||
texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps)
|
texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps)
|
||||||
{
|
{
|
||||||
u16 w = src_layout.width_in_block;
|
u16 w = src_layout.width_in_block;
|
||||||
u16 h = src_layout.height_in_block;
|
u16 h = src_layout.height_in_block;
|
||||||
|
@ -644,6 +693,11 @@ namespace rsx
|
||||||
// Remove the VTC tiling to support ATI and Vulkan.
|
// Remove the VTC tiling to support ATI and Vulkan.
|
||||||
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), w, h, depth, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
||||||
}
|
}
|
||||||
|
else if (caps.supports_zero_copy)
|
||||||
|
{
|
||||||
|
result.require_upload = true;
|
||||||
|
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 8, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u64>(dst_buffer), as_const_span<const u64>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u64>(w, caps.alignment), src_layout.pitch_in_block);
|
||||||
|
@ -661,6 +715,11 @@ namespace rsx
|
||||||
// Remove the VTC tiling to support ATI and Vulkan.
|
// Remove the VTC tiling to support ATI and Vulkan.
|
||||||
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), w, h, depth, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
||||||
}
|
}
|
||||||
|
else if (caps.supports_zero_copy)
|
||||||
|
{
|
||||||
|
result.require_upload = true;
|
||||||
|
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 16, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u128>(dst_buffer), as_const_span<const u128>(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block<u128>(w, caps.alignment), src_layout.pitch_in_block);
|
||||||
|
@ -676,57 +735,73 @@ namespace rsx
|
||||||
{
|
{
|
||||||
if (word_size == 1)
|
if (word_size == 1)
|
||||||
{
|
{
|
||||||
if (is_swizzled)
|
if (caps.supports_zero_copy)
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
|
||||||
else
|
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
else if (caps.supports_byteswap)
|
|
||||||
{
|
|
||||||
result.require_swap = true;
|
|
||||||
result.element_size = word_size;
|
|
||||||
result.block_length = words_per_block;
|
|
||||||
|
|
||||||
if (word_size == 2)
|
|
||||||
{
|
{
|
||||||
if (is_swizzled)
|
result.require_upload = true;
|
||||||
{
|
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
if (((word_size * words_per_block) & 3) == 0 && caps.supports_hw_deswizzle)
|
|
||||||
{
|
|
||||||
result.require_deswizzle = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_swizzled && !result.require_deswizzle)
|
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
|
||||||
else
|
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
}
|
||||||
else if (word_size == 4)
|
else if (is_swizzled)
|
||||||
{
|
{
|
||||||
result.require_deswizzle = (is_swizzled && caps.supports_hw_deswizzle);
|
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
}
|
||||||
if (is_swizzled && !caps.supports_hw_deswizzle)
|
else
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
{
|
||||||
else
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (word_size == 2)
|
bool require_cpu_swizzle = !caps.supports_hw_deswizzle;
|
||||||
|
bool require_cpu_byteswap = !caps.supports_byteswap;
|
||||||
|
|
||||||
|
if (is_swizzled && caps.supports_hw_deswizzle)
|
||||||
{
|
{
|
||||||
if (is_swizzled)
|
if (word_size == 4 || (((word_size * words_per_block) & 3) == 0))
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
{
|
||||||
|
result.require_deswizzle = true;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
{
|
||||||
|
require_cpu_swizzle = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (word_size == 4)
|
|
||||||
|
if (!require_cpu_byteswap && !require_cpu_swizzle)
|
||||||
{
|
{
|
||||||
if (is_swizzled)
|
result.require_deswizzle = is_swizzled;
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
result.require_swap = true;
|
||||||
else
|
result.element_size = word_size;
|
||||||
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
|
if (caps.supports_zero_copy)
|
||||||
|
{
|
||||||
|
result.require_upload = true;
|
||||||
|
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 2)
|
||||||
|
{
|
||||||
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const u16>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 4)
|
||||||
|
{
|
||||||
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (word_size == 2)
|
||||||
|
{
|
||||||
|
if (is_swizzled)
|
||||||
|
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
else
|
||||||
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u16>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 4)
|
||||||
|
{
|
||||||
|
if (is_swizzled)
|
||||||
|
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
else
|
||||||
|
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u32>>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,12 +112,22 @@ namespace rsx
|
||||||
u32 pitch_in_block;
|
u32 pitch_in_block;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct memory_transfer_cmd
|
||||||
|
{
|
||||||
|
const void* dst;
|
||||||
|
const void* src;
|
||||||
|
u32 length;
|
||||||
|
};
|
||||||
|
|
||||||
struct texture_memory_info
|
struct texture_memory_info
|
||||||
{
|
{
|
||||||
int element_size;
|
int element_size;
|
||||||
int block_length;
|
int block_length;
|
||||||
bool require_swap;
|
bool require_swap;
|
||||||
bool require_deswizzle;
|
bool require_deswizzle;
|
||||||
|
bool require_upload;
|
||||||
|
|
||||||
|
std::vector<memory_transfer_cmd> deferred_cmds;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct texture_uploader_capabilities
|
struct texture_uploader_capabilities
|
||||||
|
@ -125,6 +135,7 @@ namespace rsx
|
||||||
bool supports_byteswap;
|
bool supports_byteswap;
|
||||||
bool supports_vtc_decoding;
|
bool supports_vtc_decoding;
|
||||||
bool supports_hw_deswizzle;
|
bool supports_hw_deswizzle;
|
||||||
|
bool supports_zero_copy;
|
||||||
usz alignment;
|
usz alignment;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -143,7 +154,7 @@ namespace rsx
|
||||||
std::vector<subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture);
|
std::vector<subresource_layout> get_subresources_layout(const rsx::fragment_texture &texture);
|
||||||
std::vector<subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture);
|
std::vector<subresource_layout> get_subresources_layout(const rsx::vertex_texture &texture);
|
||||||
|
|
||||||
texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps);
|
texture_memory_info upload_texture_subresource(gsl::span<std::byte> dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, texture_uploader_capabilities& caps);
|
||||||
|
|
||||||
u8 get_format_block_size_in_bytes(int format);
|
u8 get_format_block_size_in_bytes(int format);
|
||||||
u8 get_format_block_size_in_texel(int format);
|
u8 get_format_block_size_in_texel(int format);
|
||||||
|
|
|
@ -645,7 +645,7 @@ namespace gl
|
||||||
const std::vector<rsx::subresource_layout> &input_layouts,
|
const std::vector<rsx::subresource_layout> &input_layouts,
|
||||||
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
|
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
|
||||||
{
|
{
|
||||||
rsx::texture_uploader_capabilities caps{ true, false, false, 4 };
|
rsx::texture_uploader_capabilities caps{ true, false, false, false, 4 };
|
||||||
|
|
||||||
pixel_unpack_settings unpack_settings;
|
pixel_unpack_settings unpack_settings;
|
||||||
unpack_settings.row_length(0).alignment(4);
|
unpack_settings.row_length(0).alignment(4);
|
||||||
|
|
|
@ -178,7 +178,7 @@ namespace vk
|
||||||
return inheritance_info.parent->head();
|
return inheritance_info.parent->head();
|
||||||
}
|
}
|
||||||
|
|
||||||
void dma_block::set_parent(command_buffer& cmd, dma_block* parent)
|
void dma_block::set_parent(const command_buffer& cmd, dma_block* parent)
|
||||||
{
|
{
|
||||||
ensure(parent);
|
ensure(parent);
|
||||||
if (inheritance_info.parent == parent)
|
if (inheritance_info.parent == parent)
|
||||||
|
@ -206,7 +206,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void dma_block::extend(command_buffer& cmd, const render_device &dev, usz new_size)
|
void dma_block::extend(const command_buffer& cmd, const render_device &dev, usz new_size)
|
||||||
{
|
{
|
||||||
ensure(allocated_memory);
|
ensure(allocated_memory);
|
||||||
if (new_size <= allocated_memory->size())
|
if (new_size <= allocated_memory->size())
|
||||||
|
@ -244,7 +244,7 @@ namespace vk
|
||||||
return (allocated_memory) ? allocated_memory->size() : 0;
|
return (allocated_memory) ? allocated_memory->size() : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<u32, vk::buffer*> map_dma(command_buffer& cmd, u32 local_address, u32 length)
|
std::pair<u32, vk::buffer*> map_dma(const command_buffer& cmd, u32 local_address, u32 length)
|
||||||
{
|
{
|
||||||
const auto map_range = utils::address_range::start_length(local_address, length);
|
const auto map_range = utils::address_range::start_length(local_address, length);
|
||||||
const auto first_block = (local_address & s_dma_block_mask);
|
const auto first_block = (local_address & s_dma_block_mask);
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
std::pair<u32, vk::buffer*> map_dma(command_buffer& cmd, u32 local_address, u32 length);
|
std::pair<u32, vk::buffer*> map_dma(const command_buffer& cmd, u32 local_address, u32 length);
|
||||||
void load_dma(u32 local_address, u32 length);
|
void load_dma(u32 local_address, u32 length);
|
||||||
void flush_dma(u32 local_address, u32 length);
|
void flush_dma(u32 local_address, u32 length);
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ namespace vk
|
||||||
|
|
||||||
dma_block* head();
|
dma_block* head();
|
||||||
const dma_block* head() const;
|
const dma_block* head() const;
|
||||||
void set_parent(command_buffer& cmd, dma_block* parent);
|
void set_parent(const command_buffer& cmd, dma_block* parent);
|
||||||
void extend(command_buffer& cmd, const render_device& dev, usz new_size);
|
void extend(const command_buffer& cmd, const render_device& dev, usz new_size);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,7 +68,7 @@ namespace vk
|
||||||
* Then copy all layers into dst_image.
|
* Then copy all layers into dst_image.
|
||||||
* dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag.
|
* dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag.
|
||||||
*/
|
*/
|
||||||
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
|
void copy_mipmaped_image_using_buffer(const vk::command_buffer& cmd, vk::image* dst_image,
|
||||||
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
|
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
|
||||||
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align = 0);
|
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align = 0);
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include "VKHelpers.h"
|
#include "VKHelpers.h"
|
||||||
#include "VKFormats.h"
|
#include "VKFormats.h"
|
||||||
#include "VKCompute.h"
|
#include "VKCompute.h"
|
||||||
|
#include "VKDMA.h"
|
||||||
#include "VKRenderPass.h"
|
#include "VKRenderPass.h"
|
||||||
#include "VKRenderTargets.h"
|
#include "VKRenderTargets.h"
|
||||||
|
|
||||||
|
@ -800,7 +801,7 @@ namespace vk
|
||||||
ensure(dst_offset <= scratch_buf->size());
|
ensure(dst_offset <= scratch_buf->size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
|
void copy_mipmaped_image_using_buffer(const vk::command_buffer& cmd, vk::image* dst_image,
|
||||||
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
|
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
|
||||||
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align)
|
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align)
|
||||||
{
|
{
|
||||||
|
@ -808,7 +809,7 @@ namespace vk
|
||||||
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
|
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
|
||||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||||
|
|
||||||
rsx::texture_uploader_capabilities caps{ true, false, true, heap_align };
|
rsx::texture_uploader_capabilities caps{ true, false, true, true, heap_align };
|
||||||
rsx::texture_memory_info opt{};
|
rsx::texture_memory_info opt{};
|
||||||
bool check_caps = true;
|
bool check_caps = true;
|
||||||
|
|
||||||
|
@ -820,6 +821,9 @@ namespace vk
|
||||||
std::vector<VkBufferCopy> buffer_copies;
|
std::vector<VkBufferCopy> buffer_copies;
|
||||||
copy_regions.reserve(subresource_layout.size());
|
copy_regions.reserve(subresource_layout.size());
|
||||||
|
|
||||||
|
VkBuffer read_buffer = upload_heap.heap->value;
|
||||||
|
VkDeviceSize offset_in_read_buffer = 0;
|
||||||
|
|
||||||
if (vk::is_renderpass_open(cmd))
|
if (vk::is_renderpass_open(cmd))
|
||||||
{
|
{
|
||||||
vk::end_renderpass(cmd);
|
vk::end_renderpass(cmd);
|
||||||
|
@ -877,6 +881,33 @@ namespace vk
|
||||||
copy_info.imageSubresource.mipLevel = layout.level;
|
copy_info.imageSubresource.mipLevel = layout.level;
|
||||||
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
|
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
|
||||||
|
|
||||||
|
if (opt.require_upload)
|
||||||
|
{
|
||||||
|
ensure(!opt.deferred_cmds.empty());
|
||||||
|
|
||||||
|
auto base_addr = static_cast<const char*>(opt.deferred_cmds.front().src);
|
||||||
|
auto end_addr = static_cast<const char*>(opt.deferred_cmds.back().src) + opt.deferred_cmds.back().length;
|
||||||
|
auto data_length = end_addr - base_addr;
|
||||||
|
u64 src_address = 0;
|
||||||
|
|
||||||
|
if (uptr(base_addr) > uptr(vm::g_sudo_addr))
|
||||||
|
{
|
||||||
|
src_address = uptr(base_addr) - uptr(vm::g_sudo_addr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
src_address = uptr(base_addr) - uptr(vm::g_base_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto dma_mapping = vk::map_dma(cmd, static_cast<u32>(src_address), static_cast<u32>(data_length));
|
||||||
|
vk::load_dma(src_address, data_length);
|
||||||
|
|
||||||
|
read_buffer = dma_mapping.second->value;
|
||||||
|
offset_in_read_buffer = dma_mapping.first;
|
||||||
|
|
||||||
|
copy_info.bufferOffset = offset_in_read_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
|
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
|
||||||
{
|
{
|
||||||
if (!scratch_buf)
|
if (!scratch_buf)
|
||||||
|
@ -892,11 +923,25 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy from upload heap to scratch mem
|
// Copy from upload heap to scratch mem
|
||||||
buffer_copies.push_back({});
|
if (!opt.deferred_cmds.empty())
|
||||||
auto& copy = buffer_copies.back();
|
{
|
||||||
copy.srcOffset = offset_in_buffer;
|
for (const auto& copy_cmd : opt.deferred_cmds)
|
||||||
copy.dstOffset = scratch_offset;
|
{
|
||||||
copy.size = image_linear_size;
|
buffer_copies.push_back({});
|
||||||
|
auto& copy = buffer_copies.back();
|
||||||
|
copy.srcOffset = uptr(copy_cmd.dst) + offset_in_read_buffer;
|
||||||
|
copy.dstOffset = scratch_offset;
|
||||||
|
copy.size = copy_cmd.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buffer_copies.push_back({});
|
||||||
|
auto& copy = buffer_copies.back();
|
||||||
|
copy.srcOffset = offset_in_buffer;
|
||||||
|
copy.dstOffset = scratch_offset;
|
||||||
|
copy.size = image_linear_size;
|
||||||
|
}
|
||||||
|
|
||||||
// Point data source to scratch mem
|
// Point data source to scratch mem
|
||||||
copy_info.bufferOffset = scratch_offset;
|
copy_info.bufferOffset = scratch_offset;
|
||||||
|
@ -904,12 +949,17 @@ namespace vk
|
||||||
scratch_offset += image_linear_size;
|
scratch_offset += image_linear_size;
|
||||||
ensure((scratch_offset + image_linear_size) <= scratch_buf->size()); // "Out of scratch memory"
|
ensure((scratch_offset + image_linear_size) <= scratch_buf->size()); // "Out of scratch memory"
|
||||||
}
|
}
|
||||||
|
else if (opt.require_upload)
|
||||||
|
{
|
||||||
|
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * layout.pitch_in_block, layout.width_in_texel);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
|
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
|
||||||
{
|
{
|
||||||
ensure(scratch_buf);
|
ensure(scratch_buf);
|
||||||
vkCmdCopyBuffer(cmd, upload_heap.heap->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
|
|
||||||
|
vkCmdCopyBuffer(cmd, read_buffer, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
|
||||||
|
|
||||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||||
|
@ -957,7 +1007,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vkCmdCopyBufferToImage(cmd, upload_heap.heap->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
|
vkCmdCopyBufferToImage(cmd, read_buffer, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue