mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 09:48:37 +12:00
vulkan: Use a parallel cb for texture cache ops; bug fixes
vk: More surface fixes and debug stuff vk: Crude thread sync implementation to prevent cb desync crashes due to resource usage fix build more fixes vulkan: Do not flush command queue if address cannot be flushed vk: More fixes for accuracy. Needs optimizations vk: Batch all flush-to-buffer operations in the non-critical path - More work is needed to make queue submission asynchronous
This commit is contained in:
parent
fd754a4ddc
commit
2b19031206
5 changed files with 169 additions and 76 deletions
|
@ -493,6 +493,11 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
|
||||||
//create command buffer...
|
//create command buffer...
|
||||||
m_command_buffer_pool.create((*m_device));
|
m_command_buffer_pool.create((*m_device));
|
||||||
m_command_buffer.create(m_command_buffer_pool);
|
m_command_buffer.create(m_command_buffer_pool);
|
||||||
|
|
||||||
|
//Create secondar command_buffer for parallel operations
|
||||||
|
m_secondary_command_buffer_pool.create((*m_device));
|
||||||
|
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
|
||||||
|
|
||||||
open_command_buffer();
|
open_command_buffer();
|
||||||
|
|
||||||
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
|
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
|
||||||
|
@ -620,6 +625,9 @@ VKGSRender::~VKGSRender()
|
||||||
m_command_buffer.destroy();
|
m_command_buffer.destroy();
|
||||||
m_command_buffer_pool.destroy();
|
m_command_buffer_pool.destroy();
|
||||||
|
|
||||||
|
m_secondary_command_buffer.destroy();
|
||||||
|
m_secondary_command_buffer_pool.destroy();
|
||||||
|
|
||||||
//Device handles/contexts
|
//Device handles/contexts
|
||||||
m_swap_chain->destroy();
|
m_swap_chain->destroy();
|
||||||
m_thread_context.close();
|
m_thread_context.close();
|
||||||
|
@ -632,7 +640,29 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
if (is_writing)
|
if (is_writing)
|
||||||
return m_texture_cache.invalidate_address(address);
|
return m_texture_cache.invalidate_address(address);
|
||||||
else
|
else
|
||||||
return m_texture_cache.flush_address(address, *m_device, m_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
{
|
||||||
|
if (!m_texture_cache.address_is_flushable(address))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (std::this_thread::get_id() != rsx_thread)
|
||||||
|
{
|
||||||
|
//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
|
||||||
|
m_flush_commands = true;
|
||||||
|
m_queued_threads++;
|
||||||
|
|
||||||
|
//This is awful!
|
||||||
|
while (m_flush_commands);
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||||
|
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
||||||
|
|
||||||
|
m_queued_threads--;
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||||
|
return m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -646,7 +676,9 @@ void VKGSRender::begin()
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
|
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
|
||||||
|
|
||||||
close_and_submit_command_buffer({}, m_submit_fence);
|
//??Should we wait for the queue to actually render to the GPU? or just flush the queue?
|
||||||
|
//Needs investigation to determine what drivers expect here, bottom_of_pipe is guaranteed to work, but will be too slow
|
||||||
|
close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||||
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
|
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
|
||||||
|
|
||||||
vkResetDescriptorPool(*m_device, descriptor_pool, 0);
|
vkResetDescriptorPool(*m_device, descriptor_pool, 0);
|
||||||
|
@ -833,9 +865,9 @@ void VKGSRender::end()
|
||||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
|
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
|
||||||
|
|
||||||
rsx::thread::end();
|
|
||||||
|
|
||||||
copy_render_targets_to_dma_location();
|
copy_render_targets_to_dma_location();
|
||||||
|
|
||||||
|
rsx::thread::end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::set_viewport()
|
void VKGSRender::set_viewport()
|
||||||
|
@ -875,6 +907,8 @@ void VKGSRender::on_init_thread()
|
||||||
GSRender::on_init_thread();
|
GSRender::on_init_thread();
|
||||||
m_attrib_ring_info.init(8 * RING_BUFFER_SIZE);
|
m_attrib_ring_info.init(8 * RING_BUFFER_SIZE);
|
||||||
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
|
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
|
||||||
|
|
||||||
|
rsx_thread = std::this_thread::get_id();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::on_exit()
|
void VKGSRender::on_exit()
|
||||||
|
@ -987,13 +1021,6 @@ void VKGSRender::clear_surface(u32 mask)
|
||||||
|
|
||||||
void VKGSRender::sync_at_semaphore_release()
|
void VKGSRender::sync_at_semaphore_release()
|
||||||
{
|
{
|
||||||
close_and_submit_command_buffer({}, m_submit_fence);
|
|
||||||
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
|
|
||||||
|
|
||||||
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
|
|
||||||
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
|
|
||||||
open_command_buffer();
|
|
||||||
|
|
||||||
m_flush_draw_buffers = true;
|
m_flush_draw_buffers = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1002,6 +1029,13 @@ void VKGSRender::copy_render_targets_to_dma_location()
|
||||||
if (!m_flush_draw_buffers)
|
if (!m_flush_draw_buffers)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (!g_cfg_rsx_write_color_buffers && !g_cfg_rsx_write_depth_buffer)
|
||||||
|
return;
|
||||||
|
|
||||||
|
//TODO: Make this asynchronous. Should be similar to a glFlush() but in this case its similar to glFinish
|
||||||
|
//This is due to all the hard waits for fences
|
||||||
|
//TODO: Use a command buffer array to allow explicit draw command tracking
|
||||||
|
|
||||||
if (g_cfg_rsx_write_color_buffers)
|
if (g_cfg_rsx_write_color_buffers)
|
||||||
{
|
{
|
||||||
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
|
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
|
||||||
|
@ -1023,7 +1057,28 @@ void VKGSRender::copy_render_targets_to_dma_location()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_flush_draw_buffers = false;
|
close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||||
|
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
|
||||||
|
|
||||||
|
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
|
||||||
|
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
|
||||||
|
open_command_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKGSRender::do_local_task()
|
||||||
|
{
|
||||||
|
if (m_flush_commands)
|
||||||
|
{
|
||||||
|
close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||||
|
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
|
||||||
|
|
||||||
|
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
|
||||||
|
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
|
||||||
|
open_command_buffer();
|
||||||
|
|
||||||
|
m_flush_commands = false;
|
||||||
|
while (m_queued_threads);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKGSRender::do_method(u32 cmd, u32 arg)
|
bool VKGSRender::do_method(u32 cmd, u32 arg)
|
||||||
|
@ -1294,17 +1349,16 @@ void VKGSRender::write_buffers()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence)
|
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
|
||||||
{
|
{
|
||||||
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
|
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
|
||||||
|
|
||||||
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
|
||||||
VkCommandBuffer cmd = m_command_buffer;
|
VkCommandBuffer cmd = m_command_buffer;
|
||||||
|
|
||||||
VkSubmitInfo infos = {};
|
VkSubmitInfo infos = {};
|
||||||
infos.commandBufferCount = 1;
|
infos.commandBufferCount = 1;
|
||||||
infos.pCommandBuffers = &cmd;
|
infos.pCommandBuffers = &cmd;
|
||||||
infos.pWaitDstStageMask = &pipe_stage_flags;
|
infos.pWaitDstStageMask = &pipeline_stage_flags;
|
||||||
infos.pWaitSemaphores = semaphores.data();
|
infos.pWaitSemaphores = semaphores.data();
|
||||||
infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
|
infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
|
||||||
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "VKProgramBuffer.h"
|
#include "VKProgramBuffer.h"
|
||||||
#include "../GCM.h"
|
#include "../GCM.h"
|
||||||
#include "../rsx_utils.h"
|
#include "../rsx_utils.h"
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
#pragma comment(lib, "VKstatic.1.lib")
|
#pragma comment(lib, "VKstatic.1.lib")
|
||||||
|
|
||||||
|
@ -60,6 +61,9 @@ private:
|
||||||
vk::command_pool m_command_buffer_pool;
|
vk::command_pool m_command_buffer_pool;
|
||||||
vk::command_buffer m_command_buffer;
|
vk::command_buffer m_command_buffer;
|
||||||
|
|
||||||
|
std::mutex m_secondary_cb_guard;
|
||||||
|
vk::command_pool m_secondary_command_buffer_pool;
|
||||||
|
vk::command_buffer m_secondary_command_buffer;
|
||||||
|
|
||||||
std::array<VkRenderPass, 120> m_render_passes;
|
std::array<VkRenderPass, 120> m_render_passes;
|
||||||
VkDescriptorSetLayout descriptor_layouts;
|
VkDescriptorSetLayout descriptor_layouts;
|
||||||
|
@ -86,7 +90,13 @@ private:
|
||||||
|
|
||||||
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
|
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
|
||||||
rsx::gcm_framebuffer_info m_depth_surface_info;
|
rsx::gcm_framebuffer_info m_depth_surface_info;
|
||||||
|
|
||||||
bool m_flush_draw_buffers = false;
|
bool m_flush_draw_buffers = false;
|
||||||
|
|
||||||
|
std::atomic<bool> m_flush_commands = false;
|
||||||
|
std::atomic<int> m_queued_threads = 0;
|
||||||
|
|
||||||
|
std::thread::id rsx_thread;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VKGSRender();
|
VKGSRender();
|
||||||
|
@ -94,7 +104,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void clear_surface(u32 mask);
|
void clear_surface(u32 mask);
|
||||||
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence);
|
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||||
void open_command_buffer();
|
void open_command_buffer();
|
||||||
void sync_at_semaphore_release();
|
void sync_at_semaphore_release();
|
||||||
void prepare_rtts();
|
void prepare_rtts();
|
||||||
|
@ -117,5 +127,7 @@ protected:
|
||||||
bool do_method(u32 id, u32 arg) override;
|
bool do_method(u32 id, u32 arg) override;
|
||||||
void flip(int buffer) override;
|
void flip(int buffer) override;
|
||||||
|
|
||||||
|
void do_local_task() override;
|
||||||
|
|
||||||
bool on_access_violation(u32 address, bool is_writing) override;
|
bool on_access_violation(u32 address, bool is_writing) override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -32,7 +32,7 @@ namespace rsx
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
#define CHECK_RESULT(expr) do { VkResult _res = (expr); if (_res != VK_SUCCESS) fmt::throw_exception("Assertion failed! Result is %Xh", (s32)_res); } while (0)
|
#define CHECK_RESULT(expr) { VkResult _res = (expr); if (_res != VK_SUCCESS) fmt::throw_exception("Assertion failed! Result is %Xh" HERE, (s32)_res); }
|
||||||
|
|
||||||
VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
|
VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
|
||||||
VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
|
VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
|
||||||
|
|
|
@ -28,6 +28,14 @@ namespace vk
|
||||||
|
|
||||||
cached_texture_section() {}
|
cached_texture_section() {}
|
||||||
|
|
||||||
|
void reset(u32 base, u32 length)
|
||||||
|
{
|
||||||
|
if (length > cpu_address_range)
|
||||||
|
release_dma_resources();
|
||||||
|
|
||||||
|
rsx::buffered_section::reset(base, length);
|
||||||
|
}
|
||||||
|
|
||||||
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 native_pitch = 0, bool managed=true)
|
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 native_pitch = 0, bool managed=true)
|
||||||
{
|
{
|
||||||
width = w;
|
width = w;
|
||||||
|
@ -38,8 +46,7 @@ namespace vk
|
||||||
uploaded_image_view.reset(view);
|
uploaded_image_view.reset(view);
|
||||||
vram_texture = image;
|
vram_texture = image;
|
||||||
|
|
||||||
if (managed)
|
if (managed) managed_texture.reset(image);
|
||||||
managed_texture.reset(image);
|
|
||||||
|
|
||||||
//TODO: Properly compute these values
|
//TODO: Properly compute these values
|
||||||
this->native_pitch = native_pitch;
|
this->native_pitch = native_pitch;
|
||||||
|
@ -105,16 +112,18 @@ namespace vk
|
||||||
|
|
||||||
bool is_flushable() const
|
bool is_flushable() const
|
||||||
{
|
{
|
||||||
if (protection == utils::protection::ro || protection == utils::protection::no)
|
//This section is active and can be flushed to cpu
|
||||||
return true;
|
return (protection == utils::protection::no);
|
||||||
|
|
||||||
if (uploaded_image_view.get() == nullptr && vram_texture != nullptr)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
|
bool is_flushed() const
|
||||||
|
{
|
||||||
|
//This memory section was flushable, but a flush has already removed protection
|
||||||
|
return (protection == utils::protection::rw && uploaded_image_view.get() == nullptr && managed_texture.get() == nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue,
|
||||||
|
bool manage_cb_lifetime = false, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
|
||||||
{
|
{
|
||||||
if (m_device == nullptr)
|
if (m_device == nullptr)
|
||||||
{
|
{
|
||||||
|
@ -130,7 +139,21 @@ namespace vk
|
||||||
|
|
||||||
if (dma_buffer.get() == nullptr)
|
if (dma_buffer.get() == nullptr)
|
||||||
{
|
{
|
||||||
dma_buffer.reset(new vk::buffer(*m_device, native_pitch * height, heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (manage_cb_lifetime)
|
||||||
|
{
|
||||||
|
//cb has to be guaranteed to be in a closed state
|
||||||
|
//This function can be called asynchronously
|
||||||
|
VkCommandBufferInheritanceInfo inheritance_info = {};
|
||||||
|
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
|
||||||
|
|
||||||
|
VkCommandBufferBeginInfo begin_infos = {};
|
||||||
|
begin_infos.pInheritanceInfo = &inheritance_info;
|
||||||
|
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||||
|
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||||
|
CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
|
||||||
}
|
}
|
||||||
|
|
||||||
VkBufferImageCopy copyRegion = {};
|
VkBufferImageCopy copyRegion = {};
|
||||||
|
@ -147,52 +170,47 @@ namespace vk
|
||||||
vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, ©Region);
|
vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, ©Region);
|
||||||
change_image_layout(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, layout, subresource_range);
|
change_image_layout(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, layout, subresource_range);
|
||||||
|
|
||||||
CHECK_RESULT(vkEndCommandBuffer(cmd));
|
if (manage_cb_lifetime)
|
||||||
|
{
|
||||||
|
CHECK_RESULT(vkEndCommandBuffer(cmd));
|
||||||
|
|
||||||
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||||
VkCommandBuffer command_buffer = cmd;
|
VkCommandBuffer command_buffer = cmd;
|
||||||
|
|
||||||
VkSubmitInfo infos = {};
|
VkSubmitInfo infos = {};
|
||||||
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||||
infos.commandBufferCount = 1;
|
infos.commandBufferCount = 1;
|
||||||
infos.pCommandBuffers = &command_buffer;
|
infos.pCommandBuffers = &command_buffer;
|
||||||
infos.pWaitDstStageMask = &pipe_stage_flags;
|
infos.pWaitDstStageMask = &pipe_stage_flags;
|
||||||
infos.pWaitSemaphores = nullptr;
|
infos.pWaitSemaphores = nullptr;
|
||||||
infos.waitSemaphoreCount = 0;
|
infos.waitSemaphoreCount = 0;
|
||||||
|
|
||||||
CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence));
|
CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence));
|
||||||
|
|
||||||
//Now we need to restart the command-buffer to restore it to the way it was before...
|
//Now we need to restart the command-buffer to restore it to the way it was before...
|
||||||
CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX));
|
CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX));
|
||||||
CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0));
|
CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0));
|
||||||
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
|
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
|
||||||
|
}
|
||||||
VkCommandBufferInheritanceInfo inheritance_info = {};
|
|
||||||
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
|
|
||||||
|
|
||||||
VkCommandBufferBeginInfo begin_infos = {};
|
|
||||||
begin_infos.pInheritanceInfo = &inheritance_info;
|
|
||||||
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
||||||
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
|
||||||
CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void do_memory_transfer(void *pixels_dst, void *pixels_src)
|
void do_memory_transfer(void *pixels_dst, void *pixels_src)
|
||||||
{
|
{
|
||||||
|
//LOG_ERROR(RSX, "COPY %d -> %d", native_pitch, pitch);
|
||||||
if (pitch == native_pitch)
|
if (pitch == native_pitch)
|
||||||
{
|
{
|
||||||
if (sizeof T == 1)
|
if (sizeof T == 1)
|
||||||
memcpy(pixels_dst, pixels_src, native_pitch * height);
|
memcpy(pixels_dst, pixels_src, cpu_address_range);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const u32 block_size = native_pitch * height / sizeof T;
|
const u32 block_size = width * height;
|
||||||
|
|
||||||
auto typed_dst = (be_t<T> *)pixels_dst;
|
auto typed_dst = (be_t<T> *)pixels_dst;
|
||||||
auto typed_src = (T *)pixels_src;
|
auto typed_src = (T *)pixels_src;
|
||||||
|
|
||||||
for (u8 n = 0; n < block_size; ++n)
|
for (u32 px = 0; px < block_size; ++px)
|
||||||
typed_dst[n] = typed_src[n];
|
typed_dst[px] = typed_src[px];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -203,7 +221,7 @@ namespace vk
|
||||||
u8 *typed_src = (u8 *)pixels_src;
|
u8 *typed_src = (u8 *)pixels_src;
|
||||||
|
|
||||||
//TODO: Scaling
|
//TODO: Scaling
|
||||||
for (int row = 0; row < height; ++row)
|
for (u16 row = 0; row < height; ++row)
|
||||||
{
|
{
|
||||||
memcpy(typed_dst, typed_src, native_pitch);
|
memcpy(typed_dst, typed_src, native_pitch);
|
||||||
typed_dst += pitch;
|
typed_dst += pitch;
|
||||||
|
@ -218,9 +236,9 @@ namespace vk
|
||||||
auto typed_dst = (be_t<T> *)pixels_dst;
|
auto typed_dst = (be_t<T> *)pixels_dst;
|
||||||
auto typed_src = (T *)pixels_src;
|
auto typed_src = (T *)pixels_src;
|
||||||
|
|
||||||
for (int row = 0; row < height; ++row)
|
for (u16 row = 0; row < height; ++row)
|
||||||
{
|
{
|
||||||
for (int px = 0; px < width; ++px)
|
for (u16 px = 0; px < width; ++px)
|
||||||
{
|
{
|
||||||
typed_dst[px] = typed_src[px];
|
typed_dst[px] = typed_src[px];
|
||||||
}
|
}
|
||||||
|
@ -240,15 +258,13 @@ namespace vk
|
||||||
if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
|
if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
|
||||||
{
|
{
|
||||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
||||||
copy_texture(cmd, heap_index, submit_queue, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
|
|
||||||
verify (HERE), (dma_fence != VK_NULL_HANDLE && dma_buffer.get());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protect(utils::protection::rw);
|
protect(utils::protection::rw);
|
||||||
|
|
||||||
//TODO: Image scaling, etc
|
//TODO: Image scaling, etc
|
||||||
void* pixels_src = dma_buffer->map(0, VK_WHOLE_SIZE);
|
void* pixels_src = dma_buffer->map(0, cpu_address_range);
|
||||||
void* pixels_dst = vm::base(cpu_address_base);
|
void* pixels_dst = vm::base(cpu_address_base);
|
||||||
|
|
||||||
//We have to do our own byte swapping since the driver doesnt do it for us
|
//We have to do our own byte swapping since the driver doesnt do it for us
|
||||||
|
@ -273,12 +289,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
|
//Its highly likely that this surface will be reused, so we just leave resources in place
|
||||||
//Cleanup
|
|
||||||
//These sections are usually one-use only so we destroy system resources
|
|
||||||
//TODO: Recycle dma buffers
|
|
||||||
release_dma_resources();
|
|
||||||
vram_texture = nullptr; //Let m_rtts handle lifetime management
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -333,7 +344,7 @@ namespace vk
|
||||||
for (auto &tex : m_cache)
|
for (auto &tex : m_cache)
|
||||||
{
|
{
|
||||||
if (tex.is_dirty()) continue;
|
if (tex.is_dirty()) continue;
|
||||||
if (!tex.is_flushable()) continue;
|
if (!tex.is_flushable() && !tex.is_flushed()) continue;
|
||||||
|
|
||||||
if (tex.matches(address, range))
|
if (tex.matches(address, range))
|
||||||
return &tex;
|
return &tex;
|
||||||
|
@ -529,15 +540,16 @@ namespace vk
|
||||||
void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height)
|
void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height)
|
||||||
{
|
{
|
||||||
cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
|
cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
|
||||||
region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
|
|
||||||
|
|
||||||
if (!region.is_locked())
|
if (!region.is_locked())
|
||||||
{
|
{
|
||||||
region.reset(memory_address, memory_size);
|
region.reset(memory_address, memory_size);
|
||||||
region.protect(utils::protection::no);
|
|
||||||
region.set_dirty(false);
|
region.set_dirty(false);
|
||||||
texture_cache_range = region.get_min_max(texture_cache_range);
|
texture_cache_range = region.get_min_max(texture_cache_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
region.protect(utils::protection::no);
|
||||||
|
region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
|
void flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
|
||||||
|
@ -554,6 +566,20 @@ namespace vk
|
||||||
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
|
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool address_is_flushable(u32 address)
|
||||||
|
{
|
||||||
|
for (auto &tex : m_cache)
|
||||||
|
{
|
||||||
|
if (tex.is_dirty()) continue;
|
||||||
|
if (!tex.is_flushable()) continue;
|
||||||
|
|
||||||
|
if (tex.overlaps(address))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
|
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
|
||||||
{
|
{
|
||||||
if (address < texture_cache_range.first ||
|
if (address < texture_cache_range.first ||
|
||||||
|
@ -584,8 +610,6 @@ namespace vk
|
||||||
|
|
||||||
//TODO: Map basic host_visible memory without coherent constraint
|
//TODO: Map basic host_visible memory without coherent constraint
|
||||||
tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue);
|
tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue);
|
||||||
tex.set_dirty(true);
|
|
||||||
|
|
||||||
response = true;
|
response = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -607,6 +631,7 @@ namespace vk
|
||||||
auto &tex = m_cache[i];
|
auto &tex = m_cache[i];
|
||||||
|
|
||||||
if (tex.is_dirty()) continue;
|
if (tex.is_dirty()) continue;
|
||||||
|
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
|
||||||
|
|
||||||
auto overlapped = tex.overlaps_page(trampled_range, address);
|
auto overlapped = tex.overlaps_page(trampled_range, address);
|
||||||
if (std::get<0>(overlapped))
|
if (std::get<0>(overlapped))
|
||||||
|
|
|
@ -136,12 +136,13 @@ namespace rsx
|
||||||
locked_address_range = align(base + length, 4096) - locked_address_base;
|
locked_address_range = align(base + length, 4096) - locked_address_base;
|
||||||
|
|
||||||
protection = utils::protection::rw;
|
protection = utils::protection::rw;
|
||||||
|
|
||||||
locked = false;
|
locked = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void protect(utils::protection prot)
|
void protect(utils::protection prot)
|
||||||
{
|
{
|
||||||
|
if (prot == protection) return;
|
||||||
|
|
||||||
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
|
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
|
||||||
protection = prot;
|
protection = prot;
|
||||||
locked = prot != utils::protection::rw;
|
locked = prot != utils::protection::rw;
|
||||||
|
@ -149,7 +150,8 @@ namespace rsx
|
||||||
|
|
||||||
void unprotect()
|
void unprotect()
|
||||||
{
|
{
|
||||||
return protect(utils::protection::rw);
|
protect(utils::protection::rw);
|
||||||
|
locked = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool overlaps(std::pair<u32, u32> range)
|
bool overlaps(std::pair<u32, u32> range)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue