rsx: improve memory coherency

- Avoid tagging and rely on read/write barriers and the dirty flag mechanism. Testing is done with a weak 8-byte memory test
- Introducing new data when tagging breaks applications with race conditions where tags can overwrite flushed data
This commit is contained in:
kd-11 2019-01-05 13:12:36 +03:00 committed by kd-11
parent 89c9c54743
commit 52ac0a901a
6 changed files with 49 additions and 41 deletions

View file

@ -92,10 +92,10 @@ namespace rsx
struct render_target_descriptor struct render_target_descriptor
{ {
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
u32 tag_address = 0; u32 memory_tag_address = 0u; // memory address of the start of the ROP block
u64 memory_tag_sample = 0ull; // memory sample taken at the memory_tag_address for change testing
bool dirty = false; bool dirty = false;
bool needs_tagging = false;
image_storage_type old_contents = nullptr; image_storage_type old_contents = nullptr;
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample; rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
@ -119,31 +119,26 @@ namespace rsx
write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample; write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample;
} }
void tag() bool test() const
{ {
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address); if (dirty)
*ptr = tag_address;
needs_tagging = false;
}
bool test()
{
if (needs_tagging && dirty)
{ {
// TODO // TODO
// Should RCB or mem-sync (inherit previous mem) to init memory // Should RCB or mem-sync (inherit previous mem) to init memory
LOG_TODO(RSX, "Resource used before memory initialization"); LOG_TODO(RSX, "Resource used before memory initialization");
} }
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address); return (memory_tag_sample == *vm::get_super_ptr<u64>(memory_tag_address));
return (*ptr == tag_address);
} }
void queue_tag(u32 address) void queue_tag(u32 address)
{ {
tag_address = address; memory_tag_address = address;
needs_tagging = true; }
void sync_tag()
{
memory_tag_sample = *vm::get_super_ptr<u64>(memory_tag_address);
} }
void on_write(u64 write_tag = 0) void on_write(u64 write_tag = 0)
@ -154,10 +149,8 @@ namespace rsx
last_use_tag = write_tag; last_use_tag = write_tag;
} }
if (needs_tagging) // Tag unconditionally without introducing new data
{ sync_tag();
tag();
}
read_aa_mode = write_aa_mode; read_aa_mode = write_aa_mode;
dirty = false; dirty = false;

View file

@ -372,18 +372,6 @@ namespace rsx
return true; return true;
} }
void tag_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
*ptr = texaddr;
}
bool test_framebuffer(u32 texaddr)
{
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
return *ptr == texaddr;
}
/** /**
* Section invalidation * Section invalidation
*/ */
@ -1157,7 +1145,6 @@ namespace rsx
region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...); region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...);
region.reprotect(utils::protection::no, { 0, rsx_range.length() }); region.reprotect(utils::protection::no, { 0, rsx_range.length() });
tag_framebuffer(region.get_section_base());
region.set_dirty(false); region.set_dirty(false);
region.touch(m_cache_update_tag); region.touch(m_cache_update_tag);
@ -1703,8 +1690,8 @@ namespace rsx
// TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block // TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{ {
const bool is_active = m_rtts.address_is_bound(texaddr, false); if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
if (texptr->test() || is_active) is_active || texptr->test())
{ {
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active, tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active,
@ -1719,8 +1706,8 @@ namespace rsx
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{ {
const bool is_active = m_rtts.address_is_bound(texaddr, true); if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
if (texptr->test() || is_active) is_active || texptr->test())
{ {
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active, tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active,
@ -1949,6 +1936,8 @@ namespace rsx
if (src_is_render_target) if (src_is_render_target)
{ {
src_subres.surface->read_barrier(cmd);
const auto surf = src_subres.surface; const auto surf = src_subres.surface;
auto src_bpp = surf->get_native_pitch() / surf->get_surface_width(); auto src_bpp = surf->get_native_pitch() / surf->get_surface_width();
auto expected_bpp = src_is_argb8 ? 4 : 2; auto expected_bpp = src_is_argb8 ? 4 : 2;
@ -1972,6 +1961,9 @@ namespace rsx
if (dst_is_render_target) if (dst_is_render_target)
{ {
// Full barrier is required in case of partial transfers
dst_subres.surface->read_barrier(cmd);
auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width(); auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
auto expected_bpp = dst_is_argb8 ? 4 : 2; auto expected_bpp = dst_is_argb8 ? 4 : 2;
if (dst_bpp != expected_bpp) if (dst_bpp != expected_bpp)
@ -2411,7 +2403,6 @@ namespace rsx
AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always); AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always);
section.reprotect(utils::protection::no); section.reprotect(utils::protection::no);
tag_framebuffer(section.get_section_base());
update_tag = true; update_tag = true;
} }
} }

View file

@ -309,6 +309,7 @@ void GLGSRender::end()
// Program is not ready, skip drawing this // Program is not ready, skip drawing this
std::this_thread::yield(); std::this_thread::yield();
execute_nop_draw(); execute_nop_draw();
m_rtts.on_write();
rsx::thread::end(); rsx::thread::end();
return; return;
} }

View file

@ -261,6 +261,12 @@ namespace gl
baseclass::on_speculative_flush(); baseclass::on_speculative_flush();
} }
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto as_rtt = static_cast<gl::render_target*>(vram_texture);
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
}
if (!pbo_id) if (!pbo_id)
{ {
init_buffer(); init_buffer();
@ -403,7 +409,6 @@ namespace gl
glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
// Shuffle // Shuffle
bool require_manual_shuffle = false; bool require_manual_shuffle = false;
if (pack_unpack_swap_bytes) if (pack_unpack_swap_bytes)
@ -424,7 +429,6 @@ namespace gl
} }
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD) else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
{ {
//AMD driver bug - cannot use pack_swap_bytes //AMD driver bug - cannot use pack_swap_bytes
//Manually byteswap texel data //Manually byteswap texel data
switch (type) switch (type)
@ -474,6 +478,12 @@ namespace gl
} }
} }
} }
if (context == rsx::texture_upload_context::framebuffer_storage)
{
// Update memory tag
static_cast<gl::render_target*>(vram_texture)->sync_tag();
}
} }
/** /**

View file

@ -1504,6 +1504,7 @@ void VKGSRender::end()
// Program is not ready, skip drawing this // Program is not ready, skip drawing this
std::this_thread::yield(); std::this_thread::yield();
execute_nop_draw(); execute_nop_draw();
m_rtts.on_write();
rsx::thread::end(); rsx::thread::end();
return; return;
} }

View file

@ -180,6 +180,12 @@ namespace vk
cmd.begin(); cmd.begin();
} }
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
}
vk::image *target = vram_texture; vk::image *target = vram_texture;
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width(); real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
@ -333,6 +339,12 @@ namespace vk
void finish_flush() void finish_flush()
{ {
dma_buffer->unmap(); dma_buffer->unmap();
if (context == rsx::texture_upload_context::framebuffer_storage)
{
// Update memory tag
static_cast<vk::render_target*>(vram_texture)->sync_tag();
}
} }