vk/gl: Synchronization improvements

- Properly wait for the buffer transfer operation to finish before map/readback!
- Change vkFence to vkEvent which works more like a GL fence which is what is needed.
- Implement supporting methods and functions
- Do not destroy fence by immediately waiting after copying to dma buffer
This commit is contained in:
kd-11 2019-03-14 15:27:50 +03:00 committed by kd-11
parent 85cb703633
commit a49a0f2a86
7 changed files with 99 additions and 87 deletions

View file

@ -2670,6 +2670,7 @@ namespace rsx
else else
{ {
verify(HERE), dst_is_render_target; verify(HERE), dst_is_render_target;
dst_subres.surface->on_write();
} }
if (rsx::get_resolution_scale_percent() != 100) if (rsx::get_resolution_scale_percent() != 100)

View file

@ -61,7 +61,6 @@ namespace gl
texture::format format = texture::format::rgba; texture::format format = texture::format::rgba;
texture::type type = texture::type::ubyte; texture::type type = texture::type::ubyte;
rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample;
u8 get_pixel_size(texture::format fmt_, texture::type type_) u8 get_pixel_size(texture::format fmt_, texture::type type_)
{ {
@ -157,7 +156,7 @@ namespace gl
using baseclass::cached_texture_section; using baseclass::cached_texture_section;
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only, void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes) gl::texture::format gl_format = gl::texture::format::rgba, gl::texture::type gl_type = gl::texture::type::ubyte, bool swap_bytes = false)
{ {
auto new_texture = static_cast<gl::viewable_image*>(image); auto new_texture = static_cast<gl::viewable_image*>(image);
ASSERT(!exists() || !is_managed() || vram_texture == new_texture); ASSERT(!exists() || !is_managed() || vram_texture == new_texture);
@ -166,11 +165,9 @@ namespace gl
if (read_only) if (read_only)
{ {
managed_texture.reset(vram_texture); managed_texture.reset(vram_texture);
aa_mode = rsx::surface_antialiasing::center_1_sample;
} }
else else
{ {
aa_mode = static_cast<gl::render_target*>(image)->read_aa_mode;
ASSERT(managed_texture.get() == nullptr); ASSERT(managed_texture.get() == nullptr);
} }
@ -193,28 +190,6 @@ namespace gl
baseclass::on_section_resources_created(); baseclass::on_section_resources_created();
} }
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps, u16 pitch)
{
ASSERT(!exists() || !is_managed() || vram_texture == image);
verify(HERE), pitch;
//Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle
this->width = width;
this->height = height;
this->depth = depth;
this->mipmaps = mipmaps;
managed_texture.reset(image);
vram_texture = image;
rsx_pitch = pitch;
real_pitch = 0;
// Notify baseclass
baseclass::on_section_resources_created();
}
void set_dimensions(u32 width, u32 height, u32 /*depth*/, u32 pitch) void set_dimensions(u32 width, u32 height, u32 /*depth*/, u32 pitch)
{ {
this->width = width; this->width = width;
@ -264,17 +239,20 @@ namespace gl
u32 real_width = width; u32 real_width = width;
u32 real_height = height; u32 real_height = height;
switch (aa_mode) if (context == rsx::texture_upload_context::framebuffer_storage)
{ {
case rsx::surface_antialiasing::center_1_sample: switch (static_cast<gl::render_target*>(vram_texture)->read_aa_mode)
break; {
case rsx::surface_antialiasing::diagonal_centered_2_samples: case rsx::surface_antialiasing::center_1_sample:
real_width *= 2; break;
break; case rsx::surface_antialiasing::diagonal_centered_2_samples:
default: real_width *= 2;
real_width *= 2; break;
real_height *= 2; default:
break; real_width *= 2;
real_height *= 2;
break;
}
} }
areai src_area = { 0, 0, 0, 0 }; areai src_area = { 0, 0, 0, 0 };
@ -376,16 +354,13 @@ namespace gl
verify(HERE), cmd.drv; verify(HERE), cmd.drv;
copy_texture(cmd, blocking); copy_texture(cmd, blocking);
if (blocking)
{
m_fence.wait_for_signal();
}
} }
void* map_synchronized(u32 offset, u32 size) void* map_synchronized(u32 offset, u32 size)
{ {
AUDIT(synchronized); AUDIT(synchronized && !m_fence.is_empty());
m_fence.wait_for_signal();
verify(HERE), (offset + size) <= pbo_size; verify(HERE), (offset + size) <= pbo_size;
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
@ -894,7 +869,7 @@ namespace gl
cached.set_image_type(type); cached.set_image_type(type);
cached.set_gcm_format(gcm_format); cached.set_gcm_format(gcm_format);
cached.create_read_only(image, width, height, depth, mipmaps, pitch); cached.create(width, height, depth, mipmaps, image, pitch, true);
cached.set_dirty(false); cached.set_dirty(false);
if (context != rsx::texture_upload_context::blit_engine_dst) if (context != rsx::texture_upload_context::blit_engine_dst)

View file

@ -871,10 +871,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread; const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread;
bool has_queue_ref = false; bool has_queue_ref = false;
u64 sync_timestamp = 0ull;
for (const auto& tex : result.sections_to_flush)
sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp());
if (!is_rsxthr) if (!is_rsxthr)
{ {
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions) //Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
@ -882,7 +878,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
std::lock_guard lock(m_flush_queue_mutex); std::lock_guard lock(m_flush_queue_mutex);
m_flush_requests.post(sync_timestamp == 0ull); m_flush_requests.post(false);
has_queue_ref = true; has_queue_ref = true;
} }
else if (!vk::is_uninterruptible()) else if (!vk::is_uninterruptible())
@ -895,33 +891,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
//LOG_ERROR(RSX, "Fault in uninterruptible code!"); //LOG_ERROR(RSX, "Fault in uninterruptible code!");
} }
if (sync_timestamp > 0)
{
// Wait for earliest cb submitted after the sync timestamp to finish
command_buffer_chunk *target_cb = nullptr;
for (auto &cb : m_primary_cb_list)
{
if (cb.last_sync >= sync_timestamp)
{
if (!cb.pending)
{
target_cb = nullptr;
break;
}
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
{
target_cb = &cb;
}
}
}
if (target_cb)
{
target_cb->wait(GENERAL_WAIT_TIMEOUT);
}
}
if (has_queue_ref) if (has_queue_ref)
{ {
//Wait for the RSX thread to process request if it hasn't already //Wait for the RSX thread to process request if it hasn't already
@ -3520,9 +3489,18 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
//Verify enough memory exists before attempting to handle data transfer //Verify enough memory exists before attempting to handle data transfer
check_heap_status(); check_heap_status();
const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes();
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer)) if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
{ {
m_samplers_dirty.store(true); m_samplers_dirty.store(true);
m_current_command_buffer->flags |= cb_has_blit_transfer;
if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count)
{
// A speculative write happened, flush while the dma resource is valid
// TODO: Deeper investigation as to why this can trigger problems
flush_command_queue();
}
return true; return true;
} }

View file

@ -50,7 +50,8 @@ extern u64 get_system_time();
enum command_buffer_data_flag enum command_buffer_data_flag
{ {
cb_has_occlusion_task = 1 cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2
}; };
struct command_buffer_chunk: public vk::command_buffer struct command_buffer_chunk: public vk::command_buffer

View file

@ -654,6 +654,42 @@ namespace vk
} }
} }
VkResult wait_for_event(VkEvent event, u64 timeout)
{
u64 t = 0;
while (true)
{
switch (const auto status = vkGetEventStatus(*g_current_renderer, event))
{
case VK_EVENT_SET:
return VK_SUCCESS;
case VK_EVENT_RESET:
break;
default:
die_with_error(HERE, status);
return status;
}
if (timeout)
{
if (!t)
{
t = get_system_time();
continue;
}
if ((get_system_time() - t) > timeout)
{
LOG_ERROR(RSX, "[vulkan] vk::wait_for_event has timed out!");
return VK_TIMEOUT;
}
}
//std::this_thread::yield();
_mm_pause();
}
}
void die_with_error(const char* faulting_addr, VkResult error_code) void die_with_error(const char* faulting_addr, VkResult error_code)
{ {
std::string error_message; std::string error_message;

View file

@ -181,6 +181,7 @@ namespace vk
// Fence reset with driver workarounds in place // Fence reset with driver workarounds in place
void reset_fence(VkFence *pFence); void reset_fence(VkFence *pFence);
VkResult wait_for_fence(VkFence pFence, u64 timeout = 0ull); VkResult wait_for_fence(VkFence pFence, u64 timeout = 0ull);
VkResult wait_for_event(VkEvent pEvent, u64 timeout = 0ull);
void die_with_error(const char* faulting_addr, VkResult error_code); void die_with_error(const char* faulting_addr, VkResult error_code);

View file

@ -36,7 +36,7 @@ namespace vk
std::unique_ptr<vk::viewable_image> managed_texture = nullptr; std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
//DMA relevant data //DMA relevant data
VkFence dma_fence = VK_NULL_HANDLE; VkEvent dma_fence = VK_NULL_HANDLE;
vk::render_device* m_device = nullptr; vk::render_device* m_device = nullptr;
vk::viewable_image *vram_texture = nullptr; vk::viewable_image *vram_texture = nullptr;
std::unique_ptr<vk::buffer> dma_buffer; std::unique_ptr<vk::buffer> dma_buffer;
@ -82,9 +82,9 @@ namespace vk
{ {
dma_buffer.reset(); dma_buffer.reset();
if (dma_fence != nullptr) if (dma_fence != VK_NULL_HANDLE)
{ {
vkDestroyFence(*m_device, dma_fence, nullptr); vkDestroyEvent(*m_device, dma_fence, nullptr);
dma_fence = VK_NULL_HANDLE; dma_fence = VK_NULL_HANDLE;
} }
} }
@ -164,9 +164,9 @@ namespace vk
if (dma_fence == VK_NULL_HANDLE) if (dma_fence == VK_NULL_HANDLE)
{ {
VkFenceCreateInfo createInfo = {}; VkEventCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; createInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
vkCreateFence(*m_device, &createInfo, nullptr, &dma_fence); vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
} }
if (dma_buffer.get() == nullptr) if (dma_buffer.get() == nullptr)
@ -297,16 +297,32 @@ namespace vk
if (manage_cb_lifetime) if (manage_cb_lifetime)
{ {
cmd.end(); VkFence submit_fence;
cmd.submit(submit_queue, {}, dma_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); VkFenceCreateInfo create_info{};
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &create_info, nullptr, &submit_fence);
//Now we need to restart the command-buffer to restore it to the way it was before... cmd.end();
vk::wait_for_fence(dma_fence); cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
vk::reset_fence(&dma_fence);
// Now we need to restart the command-buffer to restore it to the way it was before...
vk::wait_for_fence(submit_fence);
CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
// Cleanup
vkDestroyFence(*m_device, submit_fence, nullptr);
vkSetEvent(*m_device, dma_fence);
if (cmd.access_hint != vk::command_buffer::access_type_hint::all) if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
{
// If this is a primary CB, restart it
cmd.begin(); cmd.begin();
}
}
else
{
// Only used when doing speculation
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
} }
synchronized = true; synchronized = true;
@ -333,6 +349,10 @@ namespace vk
{ {
AUDIT(synchronized); AUDIT(synchronized);
// Synchronize, reset dma_fence after waiting
vk::wait_for_event(dma_fence, GENERAL_WAIT_TIMEOUT);
vkResetEvent(*m_device, dma_fence);
return dma_buffer->map(offset, size); return dma_buffer->map(offset, size);
} }