Optimized cached write-through

- Allows grabbing an unsynchronized memory block if overwriting contents
anyway
- Allows flushing only specified range of memory
This commit is contained in:
kd-11 2018-05-21 10:58:49 +03:00 committed by kd-11
parent f8d999b384
commit d2bf04796f
4 changed files with 72 additions and 17 deletions

View file

@ -478,7 +478,7 @@ namespace gl
flushed = true; flushed = true;
const auto valid_range = get_confirmed_range(); const auto valid_range = get_confirmed_range();
void *dst = get_raw_ptr(valid_range.first); void *dst = get_raw_ptr(valid_range.first, true);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT); void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
@ -559,7 +559,7 @@ namespace gl
} }
} }
flush_io(); flush_io(valid_range.first, valid_range.second);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);

View file

@ -245,7 +245,7 @@ namespace vk
const auto valid_range = get_confirmed_range(); const auto valid_range = get_confirmed_range();
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second); void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
void* pixels_dst = get_raw_ptr(valid_range.first); void* pixels_dst = get_raw_ptr(valid_range.first, true);
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format); const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
const auto elem_size = texel_layout.first; const auto elem_size = texel_layout.first;
@ -323,7 +323,7 @@ namespace vk
} }
} }
flush_io(); flush_io(valid_range.first, valid_range.second);
dma_buffer->unmap(); dma_buffer->unmap();
reset_write_statistics(); reset_write_statistics();

View file

@ -38,13 +38,14 @@ namespace rsx
if (locked_memory_ptr) if (locked_memory_ptr)
{ {
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range; const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first); u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4); u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
*first = cpu_address_base + confirmed_range.first; *first = cpu_address_base + confirmed_range.first;
*last = cpu_address_base + valid_limit - 4; *last = cpu_address_base + valid_limit - 4;
locked_memory_ptr.flush(); locked_memory_ptr.flush(confirmed_range.first, 4);
locked_memory_ptr.flush(valid_limit - 4, 4);
} }
} }
@ -321,10 +322,10 @@ namespace rsx
} }
template <typename T = void> template <typename T = void>
T* get_raw_ptr(u32 offset = 0) T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
{ {
verify(HERE), locked_memory_ptr; verify(HERE), locked_memory_ptr;
return locked_memory_ptr.get<T>(offset); return locked_memory_ptr.get<T>(offset, no_sync);
} }
bool test_memory_head() bool test_memory_head()
@ -350,9 +351,9 @@ namespace rsx
return (*last == (cpu_address_base + valid_limit - 4)); return (*last == (cpu_address_base + valid_limit - 4));
} }
void flush_io() const void flush_io(u32 offset = 0, u32 len = 0) const
{ {
locked_memory_ptr.flush(); locked_memory_ptr.flush(offset, len);
} }
std::pair<u32, u32> get_confirmed_range() const std::pair<u32, u32> get_confirmed_range() const

View file

@ -97,7 +97,7 @@ namespace rsx
} }
template <typename T = void> template <typename T = void>
T* get(u32 offset = 0) T* get(u32 offset = 0, bool no_sync = false)
{ {
if (contiguous) if (contiguous)
{ {
@ -105,7 +105,7 @@ namespace rsx
} }
else else
{ {
if (!synchronized) if (!synchronized && !no_sync)
sync(); sync();
return (T*)(io_cache.data() + offset); return (T*)(io_cache.data() + offset);
@ -127,16 +127,70 @@ namespace rsx
synchronized = true; synchronized = true;
} }
void flush() const void flush(u32 offset = 0, u32 len = 0) const
{ {
if (contiguous) if (contiguous)
return; return;
u8* src = (u8*)io_cache.data(); u8* src = (u8*)io_cache.data();
for (const auto &block : _blocks)
if (!offset && (!len || len == io_cache.size()))
{ {
memcpy(block.first.get(), src, block.second); for (const auto &block : _blocks)
src += block.second; {
memcpy(block.first.get(), src, block.second);
src += block.second;
}
}
else
{
auto remaining_bytes = len? len : io_cache.size() - offset;
const auto write_end = remaining_bytes + offset;
u32 write_offset;
u32 write_length;
u32 base_offset = 0;
for (const auto &block : _blocks)
{
const u32 block_end = base_offset + block.second;
if (offset >= base_offset && offset < block_end)
{
// Head
write_offset = (offset - base_offset);
write_length = std::min<u32>(block.second - write_offset, remaining_bytes);
}
else if (base_offset > offset && block_end <= write_end)
{
// Completely spanned
write_offset = 0;
write_length = block.second;
}
else if (base_offset > offset && write_end < block_end)
{
// Tail
write_offset = 0;
write_length = remaining_bytes;
}
else
{
// No overlap; skip
write_length = 0;
}
if (write_length)
{
memcpy(block.first.get() + write_offset, src + (base_offset + write_offset), write_length);
verify(HERE), write_length <= remaining_bytes;
remaining_bytes -= write_length;
if (!remaining_bytes)
break;
}
base_offset += block.second;
}
} }
} }