mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 13:31:27 +12:00
rsx: improve memory coherency
- Avoid tagging and rely on read/write barriers and the dirty flag mechanism. Testing is done with a weak 8-byte memory test - Introducing new data when tagging breaks applications with race conditions where tags can overwrite flushed data
This commit is contained in:
parent
89c9c54743
commit
52ac0a901a
6 changed files with 49 additions and 41 deletions
|
@ -92,10 +92,10 @@ namespace rsx
|
||||||
struct render_target_descriptor
|
struct render_target_descriptor
|
||||||
{
|
{
|
||||||
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
|
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
|
||||||
u32 tag_address = 0;
|
u32 memory_tag_address = 0u; // memory address of the start of the ROP block
|
||||||
|
u64 memory_tag_sample = 0ull; // memory sample taken at the memory_tag_address for change testing
|
||||||
|
|
||||||
bool dirty = false;
|
bool dirty = false;
|
||||||
bool needs_tagging = false;
|
|
||||||
image_storage_type old_contents = nullptr;
|
image_storage_type old_contents = nullptr;
|
||||||
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
|
rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample;
|
||||||
|
|
||||||
|
@ -119,31 +119,26 @@ namespace rsx
|
||||||
write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample;
|
write_aa_mode = read_aa_mode = rsx::surface_antialiasing::center_1_sample;
|
||||||
}
|
}
|
||||||
|
|
||||||
void tag()
|
bool test() const
|
||||||
{
|
{
|
||||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
|
if (dirty)
|
||||||
*ptr = tag_address;
|
|
||||||
|
|
||||||
needs_tagging = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool test()
|
|
||||||
{
|
|
||||||
if (needs_tagging && dirty)
|
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
// Should RCB or mem-sync (inherit previous mem) to init memory
|
// Should RCB or mem-sync (inherit previous mem) to init memory
|
||||||
LOG_TODO(RSX, "Resource used before memory initialization");
|
LOG_TODO(RSX, "Resource used before memory initialization");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(tag_address);
|
return (memory_tag_sample == *vm::get_super_ptr<u64>(memory_tag_address));
|
||||||
return (*ptr == tag_address);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void queue_tag(u32 address)
|
void queue_tag(u32 address)
|
||||||
{
|
{
|
||||||
tag_address = address;
|
memory_tag_address = address;
|
||||||
needs_tagging = true;
|
}
|
||||||
|
|
||||||
|
void sync_tag()
|
||||||
|
{
|
||||||
|
memory_tag_sample = *vm::get_super_ptr<u64>(memory_tag_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
void on_write(u64 write_tag = 0)
|
void on_write(u64 write_tag = 0)
|
||||||
|
@ -154,10 +149,8 @@ namespace rsx
|
||||||
last_use_tag = write_tag;
|
last_use_tag = write_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (needs_tagging)
|
// Tag unconditionally without introducing new data
|
||||||
{
|
sync_tag();
|
||||||
tag();
|
|
||||||
}
|
|
||||||
|
|
||||||
read_aa_mode = write_aa_mode;
|
read_aa_mode = write_aa_mode;
|
||||||
dirty = false;
|
dirty = false;
|
||||||
|
|
|
@ -372,18 +372,6 @@ namespace rsx
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void tag_framebuffer(u32 texaddr)
|
|
||||||
{
|
|
||||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
|
|
||||||
*ptr = texaddr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool test_framebuffer(u32 texaddr)
|
|
||||||
{
|
|
||||||
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
|
|
||||||
return *ptr == texaddr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Section invalidation
|
* Section invalidation
|
||||||
*/
|
*/
|
||||||
|
@ -1157,7 +1145,6 @@ namespace rsx
|
||||||
|
|
||||||
region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...);
|
region.create(width, height, 1, 1, image, pitch, false, std::forward<Args>(extras)...);
|
||||||
region.reprotect(utils::protection::no, { 0, rsx_range.length() });
|
region.reprotect(utils::protection::no, { 0, rsx_range.length() });
|
||||||
tag_framebuffer(region.get_section_base());
|
|
||||||
|
|
||||||
region.set_dirty(false);
|
region.set_dirty(false);
|
||||||
region.touch(m_cache_update_tag);
|
region.touch(m_cache_update_tag);
|
||||||
|
@ -1703,8 +1690,8 @@ namespace rsx
|
||||||
// TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
|
// TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
|
||||||
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
|
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
|
||||||
{
|
{
|
||||||
const bool is_active = m_rtts.address_is_bound(texaddr, false);
|
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
|
||||||
if (texptr->test() || is_active)
|
is_active || texptr->test())
|
||||||
{
|
{
|
||||||
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
|
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
|
||||||
tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active,
|
tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active,
|
||||||
|
@ -1719,8 +1706,8 @@ namespace rsx
|
||||||
|
|
||||||
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
|
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
|
||||||
{
|
{
|
||||||
const bool is_active = m_rtts.address_is_bound(texaddr, true);
|
if (const bool is_active = m_rtts.address_is_bound(texaddr, false);
|
||||||
if (texptr->test() || is_active)
|
is_active || texptr->test())
|
||||||
{
|
{
|
||||||
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
|
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts,
|
||||||
tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active,
|
tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active,
|
||||||
|
@ -1949,6 +1936,8 @@ namespace rsx
|
||||||
|
|
||||||
if (src_is_render_target)
|
if (src_is_render_target)
|
||||||
{
|
{
|
||||||
|
src_subres.surface->read_barrier(cmd);
|
||||||
|
|
||||||
const auto surf = src_subres.surface;
|
const auto surf = src_subres.surface;
|
||||||
auto src_bpp = surf->get_native_pitch() / surf->get_surface_width();
|
auto src_bpp = surf->get_native_pitch() / surf->get_surface_width();
|
||||||
auto expected_bpp = src_is_argb8 ? 4 : 2;
|
auto expected_bpp = src_is_argb8 ? 4 : 2;
|
||||||
|
@ -1972,6 +1961,9 @@ namespace rsx
|
||||||
|
|
||||||
if (dst_is_render_target)
|
if (dst_is_render_target)
|
||||||
{
|
{
|
||||||
|
// Full barrier is required in case of partial transfers
|
||||||
|
dst_subres.surface->read_barrier(cmd);
|
||||||
|
|
||||||
auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
|
auto dst_bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
|
||||||
auto expected_bpp = dst_is_argb8 ? 4 : 2;
|
auto expected_bpp = dst_is_argb8 ? 4 : 2;
|
||||||
if (dst_bpp != expected_bpp)
|
if (dst_bpp != expected_bpp)
|
||||||
|
@ -2411,7 +2403,6 @@ namespace rsx
|
||||||
AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always);
|
AUDIT(section.get_memory_read_flags() == memory_read_flags::flush_always);
|
||||||
|
|
||||||
section.reprotect(utils::protection::no);
|
section.reprotect(utils::protection::no);
|
||||||
tag_framebuffer(section.get_section_base());
|
|
||||||
update_tag = true;
|
update_tag = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -309,6 +309,7 @@ void GLGSRender::end()
|
||||||
// Program is not ready, skip drawing this
|
// Program is not ready, skip drawing this
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
execute_nop_draw();
|
execute_nop_draw();
|
||||||
|
m_rtts.on_write();
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -261,6 +261,12 @@ namespace gl
|
||||||
baseclass::on_speculative_flush();
|
baseclass::on_speculative_flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
|
{
|
||||||
|
auto as_rtt = static_cast<gl::render_target*>(vram_texture);
|
||||||
|
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
if (!pbo_id)
|
if (!pbo_id)
|
||||||
{
|
{
|
||||||
init_buffer();
|
init_buffer();
|
||||||
|
@ -403,7 +409,6 @@ namespace gl
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||||
|
|
||||||
|
|
||||||
// Shuffle
|
// Shuffle
|
||||||
bool require_manual_shuffle = false;
|
bool require_manual_shuffle = false;
|
||||||
if (pack_unpack_swap_bytes)
|
if (pack_unpack_swap_bytes)
|
||||||
|
@ -424,7 +429,6 @@ namespace gl
|
||||||
}
|
}
|
||||||
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
|
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
|
||||||
{
|
{
|
||||||
|
|
||||||
//AMD driver bug - cannot use pack_swap_bytes
|
//AMD driver bug - cannot use pack_swap_bytes
|
||||||
//Manually byteswap texel data
|
//Manually byteswap texel data
|
||||||
switch (type)
|
switch (type)
|
||||||
|
@ -474,6 +478,12 @@ namespace gl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
|
{
|
||||||
|
// Update memory tag
|
||||||
|
static_cast<gl::render_target*>(vram_texture)->sync_tag();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1504,6 +1504,7 @@ void VKGSRender::end()
|
||||||
// Program is not ready, skip drawing this
|
// Program is not ready, skip drawing this
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
execute_nop_draw();
|
execute_nop_draw();
|
||||||
|
m_rtts.on_write();
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,6 +180,12 @@ namespace vk
|
||||||
cmd.begin();
|
cmd.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
|
{
|
||||||
|
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
|
||||||
|
if (as_rtt->dirty) as_rtt->read_barrier(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
vk::image *target = vram_texture;
|
vk::image *target = vram_texture;
|
||||||
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
|
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
|
||||||
|
|
||||||
|
@ -333,6 +339,12 @@ namespace vk
|
||||||
void finish_flush()
|
void finish_flush()
|
||||||
{
|
{
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
|
|
||||||
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
|
{
|
||||||
|
// Update memory tag
|
||||||
|
static_cast<vk::render_target*>(vram_texture)->sync_tag();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue