mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 05:51:27 +12:00
vk: Fix slow bo upload + copy when using RADV
- This reverts commit dbcf1b5a03bfef18f404aa6827515af319abd744. - Avoid unmapping buffers used for GPU upload/download. It is very slow on some platforms.
This commit is contained in:
parent
56f7359da4
commit
f9d9d12f11
3 changed files with 14 additions and 16 deletions
|
@ -34,10 +34,16 @@ namespace vk
|
||||||
return inheritance_info.parent->map_range(range);
|
return inheritance_info.parent->map_range(range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (memory_mapping == nullptr)
|
||||||
|
{
|
||||||
|
memory_mapping = static_cast<u8*>(allocated_memory->map(0, VK_WHOLE_SIZE));
|
||||||
|
ensure(memory_mapping);
|
||||||
|
}
|
||||||
|
|
||||||
ensure(range.start >= base_address);
|
ensure(range.start >= base_address);
|
||||||
u32 start = range.start;
|
u32 start = range.start;
|
||||||
start -= base_address;
|
start -= base_address;
|
||||||
return allocated_memory->map(start, range.length());
|
return memory_mapping + start;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dma_block::unmap()
|
void dma_block::unmap()
|
||||||
|
@ -49,6 +55,7 @@ namespace vk
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
allocated_memory->unmap();
|
allocated_memory->unmap();
|
||||||
|
memory_mapping = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +80,7 @@ namespace vk
|
||||||
|
|
||||||
auto gc = vk::get_resource_manager();
|
auto gc = vk::get_resource_manager();
|
||||||
gc->dispose(allocated_memory);
|
gc->dispose(allocated_memory);
|
||||||
|
memory_mapping = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,8 +115,7 @@ namespace vk
|
||||||
auto dst = vm::get_super_ptr(range.start);
|
auto dst = vm::get_super_ptr(range.start);
|
||||||
std::memcpy(dst, src, range.length());
|
std::memcpy(dst, src, range.length());
|
||||||
|
|
||||||
// TODO: Clear page bits
|
// NOTE: Do not unmap. This can be extremely slow on some platforms.
|
||||||
unmap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void dma_block::load(const utils::address_range& range)
|
void dma_block::load(const utils::address_range& range)
|
||||||
|
@ -124,8 +131,7 @@ namespace vk
|
||||||
auto dst = map_range(range);
|
auto dst = map_range(range);
|
||||||
std::memcpy(dst, src, range.length());
|
std::memcpy(dst, src, range.length());
|
||||||
|
|
||||||
// TODO: Clear page bits to sychronized
|
// NOTE: Do not unmap. This can be extremely slow on some platforms.
|
||||||
unmap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<u32, buffer*> dma_block::get(const utils::address_range& range)
|
std::pair<u32, buffer*> dma_block::get(const utils::address_range& range)
|
||||||
|
|
|
@ -22,6 +22,7 @@ namespace vk
|
||||||
inheritance_info;
|
inheritance_info;
|
||||||
|
|
||||||
u32 base_address = 0;
|
u32 base_address = 0;
|
||||||
|
u8* memory_mapping = nullptr;
|
||||||
std::unique_ptr<buffer> allocated_memory;
|
std::unique_ptr<buffer> allocated_memory;
|
||||||
|
|
||||||
virtual void allocate(const render_device& dev, usz size);
|
virtual void allocate(const render_device& dev, usz size);
|
||||||
|
|
|
@ -880,7 +880,7 @@ namespace vk
|
||||||
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
|
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
|
||||||
{
|
{
|
||||||
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
|
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
|
||||||
rsx::texture_uploader_capabilities caps{ .supports_zero_copy = true, .alignment = heap_align };
|
rsx::texture_uploader_capabilities caps{ .alignment = heap_align };
|
||||||
rsx::texture_memory_info opt{};
|
rsx::texture_memory_info opt{};
|
||||||
bool check_caps = true;
|
bool check_caps = true;
|
||||||
|
|
||||||
|
@ -896,15 +896,6 @@ namespace vk
|
||||||
std::vector<std::pair<VkBuffer, u32>> upload_commands;
|
std::vector<std::pair<VkBuffer, u32>> upload_commands;
|
||||||
copy_regions.reserve(subresource_layout.size());
|
copy_regions.reserve(subresource_layout.size());
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
// RADV workaround. Buffer-to-buffer transfers are extremely slow and invoke memmove in vkCmdCopyBuffer.
|
|
||||||
if (const auto vendor = vk::get_driver_vendor();
|
|
||||||
vendor == driver_vendor::RADV)
|
|
||||||
{
|
|
||||||
caps.supports_zero_copy = false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (const rsx::subresource_layout &layout : subresource_layout)
|
for (const rsx::subresource_layout &layout : subresource_layout)
|
||||||
{
|
{
|
||||||
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout);
|
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout);
|
||||||
|
@ -922,7 +913,7 @@ namespace vk
|
||||||
{
|
{
|
||||||
caps.supports_byteswap = (image_linear_size >= 1024);
|
caps.supports_byteswap = (image_linear_size >= 1024);
|
||||||
caps.supports_hw_deswizzle = caps.supports_byteswap;
|
caps.supports_hw_deswizzle = caps.supports_byteswap;
|
||||||
caps.supports_zero_copy = caps.supports_zero_copy && caps.supports_byteswap;
|
caps.supports_zero_copy = caps.supports_byteswap;
|
||||||
caps.supports_vtc_decoding = false;
|
caps.supports_vtc_decoding = false;
|
||||||
check_caps = false;
|
check_caps = false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue