mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-15 03:08:36 +12:00
Optimized cached write-through
- Allows grabbing an unsynchronized memory block if overwriting contents anyway - Allows flushing only specified range of memory
This commit is contained in:
parent
f8d999b384
commit
d2bf04796f
4 changed files with 72 additions and 17 deletions
|
@ -478,7 +478,7 @@ namespace gl
|
||||||
flushed = true;
|
flushed = true;
|
||||||
|
|
||||||
const auto valid_range = get_confirmed_range();
|
const auto valid_range = get_confirmed_range();
|
||||||
void *dst = get_raw_ptr(valid_range.first);
|
void *dst = get_raw_ptr(valid_range.first, true);
|
||||||
|
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||||
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
|
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
|
||||||
|
@ -559,7 +559,7 @@ namespace gl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_io();
|
flush_io(valid_range.first, valid_range.second);
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||||
|
|
||||||
|
|
|
@ -245,7 +245,7 @@ namespace vk
|
||||||
|
|
||||||
const auto valid_range = get_confirmed_range();
|
const auto valid_range = get_confirmed_range();
|
||||||
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
|
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
|
||||||
void* pixels_dst = get_raw_ptr(valid_range.first);
|
void* pixels_dst = get_raw_ptr(valid_range.first, true);
|
||||||
|
|
||||||
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
|
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
|
||||||
const auto elem_size = texel_layout.first;
|
const auto elem_size = texel_layout.first;
|
||||||
|
@ -323,7 +323,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_io();
|
flush_io(valid_range.first, valid_range.second);
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
reset_write_statistics();
|
reset_write_statistics();
|
||||||
|
|
||||||
|
|
|
@ -38,13 +38,14 @@ namespace rsx
|
||||||
if (locked_memory_ptr)
|
if (locked_memory_ptr)
|
||||||
{
|
{
|
||||||
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
||||||
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
|
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
|
||||||
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
|
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
|
||||||
|
|
||||||
*first = cpu_address_base + confirmed_range.first;
|
*first = cpu_address_base + confirmed_range.first;
|
||||||
*last = cpu_address_base + valid_limit - 4;
|
*last = cpu_address_base + valid_limit - 4;
|
||||||
|
|
||||||
locked_memory_ptr.flush();
|
locked_memory_ptr.flush(confirmed_range.first, 4);
|
||||||
|
locked_memory_ptr.flush(valid_limit - 4, 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,10 +322,10 @@ namespace rsx
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T = void>
|
template <typename T = void>
|
||||||
T* get_raw_ptr(u32 offset = 0)
|
T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
|
||||||
{
|
{
|
||||||
verify(HERE), locked_memory_ptr;
|
verify(HERE), locked_memory_ptr;
|
||||||
return locked_memory_ptr.get<T>(offset);
|
return locked_memory_ptr.get<T>(offset, no_sync);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool test_memory_head()
|
bool test_memory_head()
|
||||||
|
@ -350,9 +351,9 @@ namespace rsx
|
||||||
return (*last == (cpu_address_base + valid_limit - 4));
|
return (*last == (cpu_address_base + valid_limit - 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush_io() const
|
void flush_io(u32 offset = 0, u32 len = 0) const
|
||||||
{
|
{
|
||||||
locked_memory_ptr.flush();
|
locked_memory_ptr.flush(offset, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<u32, u32> get_confirmed_range() const
|
std::pair<u32, u32> get_confirmed_range() const
|
||||||
|
|
|
@ -97,7 +97,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T = void>
|
template <typename T = void>
|
||||||
T* get(u32 offset = 0)
|
T* get(u32 offset = 0, bool no_sync = false)
|
||||||
{
|
{
|
||||||
if (contiguous)
|
if (contiguous)
|
||||||
{
|
{
|
||||||
|
@ -105,7 +105,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!synchronized)
|
if (!synchronized && !no_sync)
|
||||||
sync();
|
sync();
|
||||||
|
|
||||||
return (T*)(io_cache.data() + offset);
|
return (T*)(io_cache.data() + offset);
|
||||||
|
@ -127,16 +127,70 @@ namespace rsx
|
||||||
synchronized = true;
|
synchronized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush() const
|
void flush(u32 offset = 0, u32 len = 0) const
|
||||||
{
|
{
|
||||||
if (contiguous)
|
if (contiguous)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u8* src = (u8*)io_cache.data();
|
u8* src = (u8*)io_cache.data();
|
||||||
for (const auto &block : _blocks)
|
|
||||||
|
if (!offset && (!len || len == io_cache.size()))
|
||||||
{
|
{
|
||||||
memcpy(block.first.get(), src, block.second);
|
for (const auto &block : _blocks)
|
||||||
src += block.second;
|
{
|
||||||
|
memcpy(block.first.get(), src, block.second);
|
||||||
|
src += block.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto remaining_bytes = len? len : io_cache.size() - offset;
|
||||||
|
const auto write_end = remaining_bytes + offset;
|
||||||
|
|
||||||
|
u32 write_offset;
|
||||||
|
u32 write_length;
|
||||||
|
u32 base_offset = 0;
|
||||||
|
|
||||||
|
for (const auto &block : _blocks)
|
||||||
|
{
|
||||||
|
const u32 block_end = base_offset + block.second;
|
||||||
|
|
||||||
|
if (offset >= base_offset && offset < block_end)
|
||||||
|
{
|
||||||
|
// Head
|
||||||
|
write_offset = (offset - base_offset);
|
||||||
|
write_length = std::min<u32>(block.second - write_offset, remaining_bytes);
|
||||||
|
}
|
||||||
|
else if (base_offset > offset && block_end <= write_end)
|
||||||
|
{
|
||||||
|
// Completely spanned
|
||||||
|
write_offset = 0;
|
||||||
|
write_length = block.second;
|
||||||
|
}
|
||||||
|
else if (base_offset > offset && write_end < block_end)
|
||||||
|
{
|
||||||
|
// Tail
|
||||||
|
write_offset = 0;
|
||||||
|
write_length = remaining_bytes;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// No overlap; skip
|
||||||
|
write_length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (write_length)
|
||||||
|
{
|
||||||
|
memcpy(block.first.get() + write_offset, src + (base_offset + write_offset), write_length);
|
||||||
|
|
||||||
|
verify(HERE), write_length <= remaining_bytes;
|
||||||
|
remaining_bytes -= write_length;
|
||||||
|
if (!remaining_bytes)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
base_offset += block.second;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue