diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 2677a76b30..f6555166af 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1528,14 +1528,14 @@ namespace rsx { case CELL_GCM_TEXTURE_X16: { - // NOP, a simple way to quickly read DEPTH16 data without shadow comparison + // A simple way to quickly read DEPTH16 data without shadow comparison break; } case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_A4R4G4B4: //TODO - case CELL_GCM_TEXTURE_R5G6B5: //TODO { + // Reading depth data as XRGB8 is supported with in-shader conversion + // TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that) u32 remap = tex.remap(); result.redirected_textures |= (1 << i); result.texture_scale[i][2] = (f32&)remap; diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 33ca35fcfe..c2da2a52bd 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -10,6 +10,7 @@ namespace vk std::string m_src; vk::glsl::shader m_shader; std::unique_ptr m_program; + std::unique_ptr m_param_buffer; vk::descriptor_pool m_descriptor_pool; VkDescriptorSet m_descriptor_set = nullptr; @@ -19,20 +20,22 @@ namespace vk bool initialized = false; bool unroll_loops = true; + bool uniform_inputs = false; u32 optimal_group_size = 1; u32 optimal_kernel_size = 1; void init_descriptors() { - VkDescriptorPoolSize descriptor_pool_sizes[1] = + VkDescriptorPoolSize descriptor_pool_sizes[2] = { { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS }, + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_MAX_COMPUTE_TASKS } }; //Reserve descriptor pools m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 1); - std::vector bindings(1); + std::vector bindings(2); bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; bindings[0].descriptorCount = 1; @@ -40,10 +43,16 @@ namespace vk bindings[0].binding = 0; bindings[0].pImmutableSamplers = nullptr; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].binding = 1; + bindings[1].pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; infos.pBindings = bindings.data(); - infos.bindingCount = (u32)bindings.size(); + infos.bindingCount = uniform_inputs? 2u : 1u; CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout)); @@ -88,6 +97,7 @@ namespace vk { m_shader.destroy(); m_program.reset(); + m_param_buffer.reset(); vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr); vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr); @@ -162,11 +172,32 @@ namespace vk struct cs_shuffle_base : compute_task { - vk::buffer* m_data; + const vk::buffer* m_data; u32 m_data_offset = 0; u32 m_data_length = 0; u32 kernel_size = 1; + std::string variables, work_kernel, loop_advance, suffix; + + cs_shuffle_base() + { + work_kernel = + { + " value = data[index];\n" + " data[index] = %f(value);\n" + }; + + loop_advance = + { + " index++;\n" + }; + + suffix = + { + "}\n" + }; + } + void build(const char* function_name, u32 _kernel_size = 0) { // Initialize to allow detecting optimal settings @@ -178,7 +209,8 @@ namespace vk { "#version 430\n" "layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n" - "layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n\n" + "layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n" + "%ub" "\n" "#define KERNEL_SIZE %ks\n" "\n" @@ -188,38 +220,27 @@ namespace vk "#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n" "\n" "// Depth format conversions\n" - "#define d24x8_to_f32(bits) floatBitsToUint(float(bits >> 8) / 16777214.f)\n" + "#define d24_to_f32(bits) floatBitsToUint(float(bits) / 16777215.f)\n" + "#define f32_to_d24(bits) uint(uintBitsToFloat(bits) * 16777215.f)\n" + "#define d24x8_to_f32(bits) d24_to_f32(bits >> 8)\n" "#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n" - "#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(uint(uintBitsToFloat(bits) * 16777214.f))\n" + "#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(f32_to_d24(bits))\n" "\n" "void main()\n" "{\n" " uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n" " uint value;\n" + " %vars" "\n" }; - std::string work_kernel = - { - " value = data[index];\n" - " data[index] = %f(value);\n" - }; - - std::string loop_advance = - { - " index++;\n" - }; - - const std::string suffix = - { - "}\n" - }; - const std::pair syntax_replace[] = { { "%ws", std::to_string(optimal_group_size) }, { "%ks", std::to_string(kernel_size) }, - { "%f", function_name } + { "%vars", variables }, + { "%f", function_name }, + { "%ub", uniform_inputs? "layout(std140, set=0, binding=1) uniform ubo{ uvec4 params[16]; };\n" : "" }, }; m_src = fmt::replace_all(m_src, syntax_replace); @@ -262,9 +283,29 @@ namespace vk void bind_resources() override { m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + + if (uniform_inputs) + { + verify(HERE), m_param_buffer, m_param_buffer->value != VK_NULL_HANDLE; + m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set); + } } - void run(VkCommandBuffer cmd, vk::buffer* data, u32 data_length, u32 data_offset = 0) + void set_parameters(VkCommandBuffer cmd, const u32* params, u8 count) + { + verify(HERE), uniform_inputs; + + if (!m_param_buffer) + { + auto pdev = vk::get_current_renderer(); + m_param_buffer = std::make_unique(*pdev, 256, pdev->get_memory_mapping().host_visible_coherent, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0); + } + + vkCmdUpdateBuffer(cmd, m_param_buffer->value, 0, count * sizeof(u32), params); + } + + void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0) { m_data = data; m_data_offset = data_offset; @@ -274,7 +315,7 @@ namespace vk const auto num_bytes_to_process = align(data_length, num_bytes_per_invocation); const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation; - if (num_bytes_to_process > data->size()) + if ((num_bytes_to_process + data_offset) > data->size()) { // Technically robust buffer access should keep the driver from crashing in OOB situations LOG_ERROR(RSX, "Inadequate buffer length submitted for a compute operation." @@ -339,6 +380,134 @@ namespace vk } }; + // NOTE: D24S8 layout has the stencil in the MSB! Its actually S8|D24|S8|D24 starting at offset 0 + struct cs_interleave_task : cs_shuffle_base + { + u32 m_ssbo_length = 0; + + cs_interleave_task() + { + uniform_inputs = true; + + variables = + { + " uint block_length = params[0].x >> 2;\n" + " uint z_offset = params[0].y >> 2;\n" + " uint s_offset = params[0].z >> 2;\n" + " uint depth;\n" + " uint stencil;\n" + " uint stencil_shift;\n" + " uint stencil_offset;\n" + }; + } + + void bind_resources() override + { + m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + + if (uniform_inputs) + { + verify(HERE), m_param_buffer; + m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set); + } + } + + void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset) + { + u32 parameters[3] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset }; + set_parameters(cmd, parameters, 3); + + m_ssbo_length = stencil_offset + (data_length / 4) - data_offset; + cs_shuffle_base::run(cmd, data, data_length, data_offset); + } + }; + + struct cs_gather_d24x8 : cs_interleave_task + { + cs_gather_d24x8() + { + work_kernel = + { + " if (index >= block_length)\n" + " return;\n" + "\n" + " depth = data[index + z_offset] & 0x00FFFFFF;\n" + " stencil_offset = (index / 4);\n" + " stencil_shift = (index % 4) * 8;\n" + " stencil = data[stencil_offset + s_offset];\n" + " stencil = (stencil >> stencil_shift) & 0xFF;\n" + " value = (depth << 8) | stencil;\n" + " data[index] = value;\n" + }; + + cs_shuffle_base::build(""); + } + }; + + struct cs_gather_d32x8 : cs_interleave_task + { + cs_gather_d32x8() + { + work_kernel = + { + " if (index >= block_length)\n" + " return;\n" + "\n" + " depth = f32_to_d24(data[index + z_offset]);\n" + " stencil_offset = (index / 4);\n" + " stencil_shift = (index % 4) * 8;\n" + " stencil = data[stencil_offset + s_offset];\n" + " stencil = (stencil >> stencil_shift) & 0xFF;\n" + " value = (depth << 8) | stencil;\n" + " data[index] = value;\n" + }; + + cs_shuffle_base::build(""); + } + }; + + struct cs_scatter_d24x8 : cs_interleave_task + { + cs_scatter_d24x8() + { + work_kernel = + { + " if (index >= block_length)\n" + " return;\n" + "\n" + " value = data[index];\n" + " data[index + z_offset] = (value >> 8);\n" + " stencil_offset = (index / 4);\n" + " stencil_shift = (index % 4) * 8;\n" + " stencil = (value & 0xFF) << stencil_shift;\n" + " data[stencil_offset + s_offset] |= stencil;\n" + }; + + cs_shuffle_base::build(""); + } + }; + + struct cs_scatter_d32x8 : cs_interleave_task + { + cs_scatter_d32x8() + { + work_kernel = + { + " if (index >= block_length)\n" + " return;\n" + "\n" + " value = data[index];\n" + " data[index + z_offset] = d24_to_f32(value >> 8);\n" + " stencil_offset = (index / 4);\n" + " stencil_shift = (index % 4) * 8;\n" + " stencil = (value & 0xFF) << stencil_shift;\n" + " data[stencil_offset + s_offset] |= stencil;\n" + }; + + cs_shuffle_base::build(""); + } + }; + // TODO: Replace with a proper manager extern std::unordered_map> g_compute_tasks; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index a78e596100..dfcbcbe276 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -146,6 +146,9 @@ namespace vk void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout); + void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region); + void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region); + void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF); diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index ebb83fc34a..2864e09692 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -106,6 +106,127 @@ namespace vk fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format); } + void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region) + { + switch (src->format()) + { + default: + { + vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, ®ion); + break; + } + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + { + verify(HERE), region.imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width; + const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height; + const u32 packed_length = out_w * out_h * 4; + const u32 in_depth_size = packed_length; + const u32 in_stencil_size = out_w * out_h; + + const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size; + verify(HERE), dst->size() >= allocation_end; + + const VkDeviceSize z_offset = align(region.bufferOffset + packed_length, 256); + const VkDeviceSize s_offset = align(z_offset + in_depth_size, 256); + + // 1. Copy the depth and stencil blocks to separate banks + VkBufferImageCopy sub_regions[2]; + sub_regions[0] = sub_regions[1] = region; + sub_regions[0].bufferOffset = z_offset; + sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + sub_regions[1].bufferOffset = s_offset; + sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 2, sub_regions); + + // 2. Interleave the separated data blocks with a compute job + vk::cs_interleave_task *job; + if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT) + { + job = vk::get_compute_task(); + } + else + { + job = vk::get_compute_task(); + } + + vk::insert_buffer_memory_barrier(cmd, dst->value, z_offset, in_depth_size + in_stencil_size, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + + job->run(cmd, dst, (u32)region.bufferOffset, packed_length, z_offset, s_offset); + + vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + break; + } + } + } + + void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region) + { + switch (dst->format()) + { + default: + { + vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 1, ®ion); + break; + } + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + { + const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width; + const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height; + const u32 packed_length = out_w * out_h * 4; + const u32 in_depth_size = packed_length; + const u32 in_stencil_size = out_w * out_h; + + const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size; + verify(HERE), src->size() >= allocation_end; + + const VkDeviceSize z_offset = align(region.bufferOffset + packed_length, 256); + const VkDeviceSize s_offset = align(z_offset + in_depth_size, 256); + + // Zero out the stencil block + vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0); + + vk::insert_buffer_memory_barrier(cmd, src->value, s_offset, in_stencil_size, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); + + // 1. Scatter the interleaved data into separate depth and stencil blocks + vk::cs_interleave_task *job; + if (dst->format() == VK_FORMAT_D24_UNORM_S8_UINT) + { + job = vk::get_compute_task(); + } + else + { + job = vk::get_compute_task(); + } + + job->run(cmd, src, (u32)region.bufferOffset, packed_length, z_offset, s_offset); + + vk::insert_buffer_memory_barrier(cmd, src->value, z_offset, in_depth_size + in_stencil_size, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + + // 2. Copy the separated blocks into the target + VkBufferImageCopy sub_regions[2]; + sub_regions[0] = sub_regions[1] = region; + sub_regions[0].bufferOffset = z_offset; + sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + sub_regions[1].bufferOffset = s_offset; + sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 2, sub_regions); + break; + } + } + } + void copy_image_typeless(const vk::command_buffer& cmd, const vk::image* src, const vk::image* dst, const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask) { @@ -138,7 +259,7 @@ namespace vk for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { - vkCmdCopyImageToBuffer(cmd, src->value, preferred_src_format, scratch_buf->value, 1, &src_copy); + vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy); const auto src_convert = get_format_convert_flags(src->info.format); const auto dst_convert = get_format_convert_flags(dst->info.format); @@ -187,7 +308,7 @@ namespace vk } } - vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst->value, preferred_dst_format, 1, &dst_copy); + vk::copy_buffer_to_image(cmd, scratch_buf, dst, dst_copy); src_copy.imageSubresource.mipLevel++; dst_copy.imageSubresource.mipLevel++; @@ -438,9 +559,6 @@ namespace vk u32 block_in_pixel = get_format_block_size_in_texel(format); u8 block_size_in_bytes = get_format_block_size_in_bytes(format); - //TODO: Depth and stencil transfer together - flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT); - for (const rsx_subresource_layout &layout : subresource_layout) { u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256); @@ -449,29 +567,26 @@ namespace vk //Map with extra padding bytes in case of realignment size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8); void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8); - void *dst = mapped_buffer; VkBuffer buffer_handle = upload_heap.heap->value; - if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT) - { - //Misalign intentionally to skip the first stencil byte in D24S8 data - //Ensures the real depth data is dword aligned - - //Skip leading dword when writing to texture - offset_in_buffer += 4; - dst = (char*)(mapped_buffer) + 4 - 1; - } - - gsl::span mapped{ (gsl::byte*)dst, ::narrow(image_linear_size) }; + gsl::span mapped{ (gsl::byte*)mapped_buffer, ::narrow(image_linear_size) }; upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256); upload_heap.unmap(); - if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) - { - // Run GPU compute task to convert the D24x8 to FP32 - // NOTE: On commandbuffer submission, the HOST_WRITE to ALL_COMMANDS barrier is implicitly inserted according to spec - // No need to add another explicit barrier unless a driver bug is found + VkBufferImageCopy copy_info = {}; + copy_info.bufferOffset = offset_in_buffer; + copy_info.imageExtent.height = layout.height_in_block * block_in_pixel; + copy_info.imageExtent.width = layout.width_in_block * block_in_pixel; + copy_info.imageExtent.depth = layout.depth; + copy_info.imageSubresource.aspectMask = flags; + copy_info.imageSubresource.layerCount = 1; + copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count; + copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count; + copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes; + if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT || + dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT) + { // Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead auto scratch_buf = vk::get_scratch_buffer(); @@ -485,27 +600,14 @@ namespace vk insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vk::get_compute_task()->run(cmd, upload_heap.heap.get(), image_linear_size, (u32)offset_in_buffer); - - insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); - - buffer_handle = scratch_buf->value; - offset_in_buffer = 0; + copy_info.bufferOffset = 0; + vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, copy_info); + } + else + { + vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info); } - VkBufferImageCopy copy_info = {}; - copy_info.bufferOffset = offset_in_buffer; - copy_info.imageExtent.height = layout.height_in_block * block_in_pixel; - copy_info.imageExtent.width = layout.width_in_block * block_in_pixel; - copy_info.imageExtent.depth = layout.depth; - copy_info.imageSubresource.aspectMask = flags; - copy_info.imageSubresource.layerCount = 1; - copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count; - copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count; - copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes; - - vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info); mipmap_level++; } }