vulkan: Use a parallel cb for texture cache ops; bug fixes

vk: More surface fixes and debug stuff

vk: Crude thread sync implementation to prevent cb desync crashes due to resource usage

fix build

more fixes

vulkan: Do not flush command queue if address cannot be flushed

vk: More fixes for accuracy. Needs optimizations

vk: Batch all flush-to-buffer operations in the non-critical path
- More work is needed to make queue submission asynchronous
This commit is contained in:
kd-11 2017-04-21 22:55:05 +03:00
parent fd754a4ddc
commit 2b19031206
5 changed files with 169 additions and 76 deletions

View file

@ -493,6 +493,11 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
//create command buffer... //create command buffer...
m_command_buffer_pool.create((*m_device)); m_command_buffer_pool.create((*m_device));
m_command_buffer.create(m_command_buffer_pool); m_command_buffer.create(m_command_buffer_pool);
//Create secondar command_buffer for parallel operations
m_secondary_command_buffer_pool.create((*m_device));
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
open_command_buffer(); open_command_buffer();
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
@ -620,6 +625,9 @@ VKGSRender::~VKGSRender()
m_command_buffer.destroy(); m_command_buffer.destroy();
m_command_buffer_pool.destroy(); m_command_buffer_pool.destroy();
m_secondary_command_buffer.destroy();
m_secondary_command_buffer_pool.destroy();
//Device handles/contexts //Device handles/contexts
m_swap_chain->destroy(); m_swap_chain->destroy();
m_thread_context.close(); m_thread_context.close();
@ -632,7 +640,29 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
if (is_writing) if (is_writing)
return m_texture_cache.invalidate_address(address); return m_texture_cache.invalidate_address(address);
else else
return m_texture_cache.flush_address(address, *m_device, m_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); {
if (!m_texture_cache.address_is_flushable(address))
return false;
if (std::this_thread::get_id() != rsx_thread)
{
//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
m_flush_commands = true;
m_queued_threads++;
//This is awful!
while (m_flush_commands);
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
m_queued_threads--;
return status;
}
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
return m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
return false; return false;
} }
@ -646,7 +676,9 @@ void VKGSRender::begin()
{ {
std::chrono::time_point<steady_clock> submit_start = steady_clock::now(); std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
close_and_submit_command_buffer({}, m_submit_fence); //??Should we wait for the queue to actually render to the GPU? or just flush the queue?
//Needs investigation to determine what drivers expect here, bottom_of_pipe is guaranteed to work, but will be too slow
close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
vkResetDescriptorPool(*m_device, descriptor_pool, 0); vkResetDescriptorPool(*m_device, descriptor_pool, 0);
@ -833,9 +865,9 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count(); m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
rsx::thread::end();
copy_render_targets_to_dma_location(); copy_render_targets_to_dma_location();
rsx::thread::end();
} }
void VKGSRender::set_viewport() void VKGSRender::set_viewport()
@ -875,6 +907,8 @@ void VKGSRender::on_init_thread()
GSRender::on_init_thread(); GSRender::on_init_thread();
m_attrib_ring_info.init(8 * RING_BUFFER_SIZE); m_attrib_ring_info.init(8 * RING_BUFFER_SIZE);
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
rsx_thread = std::this_thread::get_id();
} }
void VKGSRender::on_exit() void VKGSRender::on_exit()
@ -987,13 +1021,6 @@ void VKGSRender::clear_surface(u32 mask)
void VKGSRender::sync_at_semaphore_release() void VKGSRender::sync_at_semaphore_release()
{ {
close_and_submit_command_buffer({}, m_submit_fence);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
m_flush_draw_buffers = true; m_flush_draw_buffers = true;
} }
@ -1002,6 +1029,13 @@ void VKGSRender::copy_render_targets_to_dma_location()
if (!m_flush_draw_buffers) if (!m_flush_draw_buffers)
return; return;
if (!g_cfg_rsx_write_color_buffers && !g_cfg_rsx_write_depth_buffer)
return;
//TODO: Make this asynchronous. Should be similar to a glFlush() but in this case its similar to glFinish
//This is due to all the hard waits for fences
//TODO: Use a command buffer array to allow explicit draw command tracking
if (g_cfg_rsx_write_color_buffers) if (g_cfg_rsx_write_color_buffers)
{ {
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++) for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
@ -1023,7 +1057,28 @@ void VKGSRender::copy_render_targets_to_dma_location()
} }
} }
m_flush_draw_buffers = false; close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
}
void VKGSRender::do_local_task()
{
if (m_flush_commands)
{
close_and_submit_command_buffer({}, m_submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
m_flush_commands = false;
while (m_queued_threads);
}
} }
bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::do_method(u32 cmd, u32 arg)
@ -1294,17 +1349,16 @@ void VKGSRender::write_buffers()
{ {
} }
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence) void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{ {
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer)); CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
VkCommandBuffer cmd = m_command_buffer; VkCommandBuffer cmd = m_command_buffer;
VkSubmitInfo infos = {}; VkSubmitInfo infos = {};
infos.commandBufferCount = 1; infos.commandBufferCount = 1;
infos.pCommandBuffers = &cmd; infos.pCommandBuffers = &cmd;
infos.pWaitDstStageMask = &pipe_stage_flags; infos.pWaitDstStageMask = &pipeline_stage_flags;
infos.pWaitSemaphores = semaphores.data(); infos.pWaitSemaphores = semaphores.data();
infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size()); infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;

View file

@ -11,6 +11,7 @@
#include "VKProgramBuffer.h" #include "VKProgramBuffer.h"
#include "../GCM.h" #include "../GCM.h"
#include "../rsx_utils.h" #include "../rsx_utils.h"
#include <atomic>
#pragma comment(lib, "VKstatic.1.lib") #pragma comment(lib, "VKstatic.1.lib")
@ -60,6 +61,9 @@ private:
vk::command_pool m_command_buffer_pool; vk::command_pool m_command_buffer_pool;
vk::command_buffer m_command_buffer; vk::command_buffer m_command_buffer;
std::mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer;
std::array<VkRenderPass, 120> m_render_passes; std::array<VkRenderPass, 120> m_render_passes;
VkDescriptorSetLayout descriptor_layouts; VkDescriptorSetLayout descriptor_layouts;
@ -86,7 +90,13 @@ private:
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count]; rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info m_depth_surface_info; rsx::gcm_framebuffer_info m_depth_surface_info;
bool m_flush_draw_buffers = false; bool m_flush_draw_buffers = false;
std::atomic<bool> m_flush_commands = false;
std::atomic<int> m_queued_threads = 0;
std::thread::id rsx_thread;
public: public:
VKGSRender(); VKGSRender();
@ -94,7 +104,7 @@ public:
private: private:
void clear_surface(u32 mask); void clear_surface(u32 mask);
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence); void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
void open_command_buffer(); void open_command_buffer();
void sync_at_semaphore_release(); void sync_at_semaphore_release();
void prepare_rtts(); void prepare_rtts();
@ -117,5 +127,7 @@ protected:
bool do_method(u32 id, u32 arg) override; bool do_method(u32 id, u32 arg) override;
void flip(int buffer) override; void flip(int buffer) override;
void do_local_task() override;
bool on_access_violation(u32 address, bool is_writing) override; bool on_access_violation(u32 address, bool is_writing) override;
}; };

View file

@ -32,7 +32,7 @@ namespace rsx
namespace vk namespace vk
{ {
#define CHECK_RESULT(expr) do { VkResult _res = (expr); if (_res != VK_SUCCESS) fmt::throw_exception("Assertion failed! Result is %Xh", (s32)_res); } while (0) #define CHECK_RESULT(expr) { VkResult _res = (expr); if (_res != VK_SUCCESS) fmt::throw_exception("Assertion failed! Result is %Xh" HERE, (s32)_res); }
VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); VKAPI_ATTR void *VKAPI_CALL mem_realloc(void *pUserData, void *pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);
VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); VKAPI_ATTR void *VKAPI_CALL mem_alloc(void *pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope);

View file

@ -28,6 +28,14 @@ namespace vk
cached_texture_section() {} cached_texture_section() {}
void reset(u32 base, u32 length)
{
if (length > cpu_address_range)
release_dma_resources();
rsx::buffered_section::reset(base, length);
}
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 native_pitch = 0, bool managed=true) void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 native_pitch = 0, bool managed=true)
{ {
width = w; width = w;
@ -38,8 +46,7 @@ namespace vk
uploaded_image_view.reset(view); uploaded_image_view.reset(view);
vram_texture = image; vram_texture = image;
if (managed) if (managed) managed_texture.reset(image);
managed_texture.reset(image);
//TODO: Properly compute these values //TODO: Properly compute these values
this->native_pitch = native_pitch; this->native_pitch = native_pitch;
@ -105,16 +112,18 @@ namespace vk
bool is_flushable() const bool is_flushable() const
{ {
if (protection == utils::protection::ro || protection == utils::protection::no) //This section is active and can be flushed to cpu
return true; return (protection == utils::protection::no);
if (uploaded_image_view.get() == nullptr && vram_texture != nullptr)
return true;
return false;
} }
void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) bool is_flushed() const
{
//This memory section was flushable, but a flush has already removed protection
return (protection == utils::protection::rw && uploaded_image_view.get() == nullptr && managed_texture.get() == nullptr);
}
void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue,
bool manage_cb_lifetime = false, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{ {
if (m_device == nullptr) if (m_device == nullptr)
{ {
@ -130,7 +139,21 @@ namespace vk
if (dma_buffer.get() == nullptr) if (dma_buffer.get() == nullptr)
{ {
dma_buffer.reset(new vk::buffer(*m_device, native_pitch * height, heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
}
if (manage_cb_lifetime)
{
//cb has to be guaranteed to be in a closed state
//This function can be called asynchronously
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
} }
VkBufferImageCopy copyRegion = {}; VkBufferImageCopy copyRegion = {};
@ -147,52 +170,47 @@ namespace vk
vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, &copyRegion); vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, &copyRegion);
change_image_layout(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, layout, subresource_range); change_image_layout(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, layout, subresource_range);
CHECK_RESULT(vkEndCommandBuffer(cmd)); if (manage_cb_lifetime)
{
CHECK_RESULT(vkEndCommandBuffer(cmd));
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkCommandBuffer command_buffer = cmd; VkCommandBuffer command_buffer = cmd;
VkSubmitInfo infos = {}; VkSubmitInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
infos.commandBufferCount = 1; infos.commandBufferCount = 1;
infos.pCommandBuffers = &command_buffer; infos.pCommandBuffers = &command_buffer;
infos.pWaitDstStageMask = &pipe_stage_flags; infos.pWaitDstStageMask = &pipe_stage_flags;
infos.pWaitSemaphores = nullptr; infos.pWaitSemaphores = nullptr;
infos.waitSemaphoreCount = 0; infos.waitSemaphoreCount = 0;
CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence)); CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence));
//Now we need to restart the command-buffer to restore it to the way it was before... //Now we need to restart the command-buffer to restore it to the way it was before...
CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX)); CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX));
CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0)); CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0));
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence)); CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
}
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
} }
template<typename T> template<typename T>
void do_memory_transfer(void *pixels_dst, void *pixels_src) void do_memory_transfer(void *pixels_dst, void *pixels_src)
{ {
//LOG_ERROR(RSX, "COPY %d -> %d", native_pitch, pitch);
if (pitch == native_pitch) if (pitch == native_pitch)
{ {
if (sizeof T == 1) if (sizeof T == 1)
memcpy(pixels_dst, pixels_src, native_pitch * height); memcpy(pixels_dst, pixels_src, cpu_address_range);
else else
{ {
const u32 block_size = native_pitch * height / sizeof T; const u32 block_size = width * height;
auto typed_dst = (be_t<T> *)pixels_dst; auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src; auto typed_src = (T *)pixels_src;
for (u8 n = 0; n < block_size; ++n) for (u32 px = 0; px < block_size; ++px)
typed_dst[n] = typed_src[n]; typed_dst[px] = typed_src[px];
} }
} }
else else
@ -203,7 +221,7 @@ namespace vk
u8 *typed_src = (u8 *)pixels_src; u8 *typed_src = (u8 *)pixels_src;
//TODO: Scaling //TODO: Scaling
for (int row = 0; row < height; ++row) for (u16 row = 0; row < height; ++row)
{ {
memcpy(typed_dst, typed_src, native_pitch); memcpy(typed_dst, typed_src, native_pitch);
typed_dst += pitch; typed_dst += pitch;
@ -218,9 +236,9 @@ namespace vk
auto typed_dst = (be_t<T> *)pixels_dst; auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src; auto typed_src = (T *)pixels_src;
for (int row = 0; row < height; ++row) for (u16 row = 0; row < height; ++row)
{ {
for (int px = 0; px < width; ++px) for (u16 px = 0; px < width; ++px)
{ {
typed_dst[px] = typed_src[px]; typed_dst[px] = typed_src[px];
} }
@ -240,15 +258,13 @@ namespace vk
if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr) if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
{ {
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
copy_texture(cmd, heap_index, submit_queue, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
verify (HERE), (dma_fence != VK_NULL_HANDLE && dma_buffer.get());
} }
protect(utils::protection::rw); protect(utils::protection::rw);
//TODO: Image scaling, etc //TODO: Image scaling, etc
void* pixels_src = dma_buffer->map(0, VK_WHOLE_SIZE); void* pixels_src = dma_buffer->map(0, cpu_address_range);
void* pixels_dst = vm::base(cpu_address_base); void* pixels_dst = vm::base(cpu_address_base);
//We have to do our own byte swapping since the driver doesnt do it for us //We have to do our own byte swapping since the driver doesnt do it for us
@ -273,12 +289,7 @@ namespace vk
} }
dma_buffer->unmap(); dma_buffer->unmap();
//Its highly likely that this surface will be reused, so we just leave resources in place
//Cleanup
//These sections are usually one-use only so we destroy system resources
//TODO: Recycle dma buffers
release_dma_resources();
vram_texture = nullptr; //Let m_rtts handle lifetime management
} }
}; };
@ -333,7 +344,7 @@ namespace vk
for (auto &tex : m_cache) for (auto &tex : m_cache)
{ {
if (tex.is_dirty()) continue; if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue; if (!tex.is_flushable() && !tex.is_flushed()) continue;
if (tex.matches(address, range)) if (tex.matches(address, range))
return &tex; return &tex;
@ -529,15 +540,16 @@ namespace vk
void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height) void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height)
{ {
cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1); cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
if (!region.is_locked()) if (!region.is_locked())
{ {
region.reset(memory_address, memory_size); region.reset(memory_address, memory_size);
region.protect(utils::protection::no);
region.set_dirty(false); region.set_dirty(false);
texture_cache_range = region.get_min_max(texture_cache_range); texture_cache_range = region.get_min_max(texture_cache_range);
} }
region.protect(utils::protection::no);
region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
} }
void flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) void flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
@ -554,6 +566,20 @@ namespace vk
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue); region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
} }
bool address_is_flushable(u32 address)
{
for (auto &tex : m_cache)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return true;
}
return false;
}
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
{ {
if (address < texture_cache_range.first || if (address < texture_cache_range.first ||
@ -584,8 +610,6 @@ namespace vk
//TODO: Map basic host_visible memory without coherent constraint //TODO: Map basic host_visible memory without coherent constraint
tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue); tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue);
tex.set_dirty(true);
response = true; response = true;
} }
} }
@ -607,6 +631,7 @@ namespace vk
auto &tex = m_cache[i]; auto &tex = m_cache[i];
if (tex.is_dirty()) continue; if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address); auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped)) if (std::get<0>(overlapped))

View file

@ -136,12 +136,13 @@ namespace rsx
locked_address_range = align(base + length, 4096) - locked_address_base; locked_address_range = align(base + length, 4096) - locked_address_base;
protection = utils::protection::rw; protection = utils::protection::rw;
locked = false; locked = false;
} }
void protect(utils::protection prot) void protect(utils::protection prot)
{ {
if (prot == protection) return;
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot); utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
protection = prot; protection = prot;
locked = prot != utils::protection::rw; locked = prot != utils::protection::rw;
@ -149,7 +150,8 @@ namespace rsx
void unprotect() void unprotect()
{ {
return protect(utils::protection::rw); protect(utils::protection::rw);
locked = false;
} }
bool overlaps(std::pair<u32, u32> range) bool overlaps(std::pair<u32, u32> range)