rsx: Drop format information from the vertex cache and use a simpler model that is much faster

This commit is contained in:
kd-11 2023-06-09 00:09:39 +03:00 committed by kd-11
parent 2df7e46cb1
commit 4d565eade6
7 changed files with 35 additions and 36 deletions

View file

@ -20,8 +20,8 @@
namespace gl namespace gl
{ {
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<GLenum>, GLenum>; using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<GLenum>; using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache;
using null_vertex_cache = vertex_cache; using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<void*, GLProgramBuffer>; using shader_cache = rsx::shaders_cache<void*, GLProgramBuffer>;

View file

@ -195,7 +195,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride); const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset; storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first)) if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first))
{ {
ensure(cached->local_address == storage_address); ensure(cached->local_address == storage_address);
@ -216,7 +216,7 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
if (to_store) if (to_store)
{ {
//store ref in vertex cache //store ref in vertex cache
m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second); m_vertex_cache->store_range(storage_address, required.first, persistent_mapping.second);
} }
} }

View file

@ -31,8 +31,8 @@ namespace vk
struct program_cache; struct program_cache;
struct pipeline_props; struct pipeline_props;
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>; using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range>;
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>; using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache;
using null_vertex_cache = vertex_cache; using null_vertex_cache = vertex_cache;
using shader_cache = rsx::shaders_cache<vk::pipeline_props, vk::program_cache>; using shader_cache = rsx::shaders_cache<vk::pipeline_props, vk::program_cache>;

View file

@ -251,7 +251,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride); const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset; storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first)) if (auto cached = m_vertex_cache->find_vertex_range(storage_address, required.first))
{ {
ensure(cached->local_address == storage_address); ensure(cached->local_address == storage_address);
@ -274,7 +274,7 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
if (to_store) if (to_store)
{ {
//store ref in vertex cache //store ref in vertex cache
m_vertex_cache->store_range(storage_address, VK_FORMAT_R8_UINT, required.first, static_cast<u32>(persistent_offset)); m_vertex_cache->store_range(storage_address, required.first, static_cast<u32>(persistent_offset));
} }
} }
} }

View file

@ -3,6 +3,7 @@
#include "Utilities/lockless.h" #include "Utilities/lockless.h"
#include "Utilities/Thread.h" #include "Utilities/Thread.h"
#include "Common/bitfield.hpp" #include "Common/bitfield.hpp"
#include "Common/unordered_map.hpp"
#include "Emu/System.h" #include "Emu/System.h"
#include "Emu/cache_utils.hpp" #include "Emu/cache_utils.hpp"
#include "Program/ProgramStateCache.h" #include "Program/ProgramStateCache.h"
@ -10,7 +11,6 @@
#include "Overlays/Shaders/shader_loading_dialog.h" #include "Overlays/Shaders/shader_loading_dialog.h"
#include <chrono> #include <chrono>
#include <unordered_map>
#include "util/sysinfo.hpp" #include "util/sysinfo.hpp"
#include "util/fnv_hash.hpp" #include "util/fnv_hash.hpp"
@ -447,67 +447,62 @@ namespace rsx
namespace vertex_cache namespace vertex_cache
{ {
// A null vertex cache // A null vertex cache
template <typename storage_type, typename upload_format> template <typename storage_type>
class default_vertex_cache class default_vertex_cache
{ {
public: public:
virtual ~default_vertex_cache() = default; virtual ~default_vertex_cache() = default;
virtual storage_type* find_vertex_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; } virtual const storage_type* find_vertex_range(u32 /*local_addr*/, u32 /*data_length*/) { return nullptr; }
virtual void store_range(uptr /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {} virtual void store_range(u32 /*local_addr*/, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
virtual void purge() {} virtual void purge() {}
}; };
// A weak vertex cache with no data checks or memory range locks
// Of limited use since contents are only guaranteed to be valid once per frame
// TODO: Strict vertex cache with range locks
template <typename upload_format>
struct uploaded_range struct uploaded_range
{ {
uptr local_address; uptr local_address;
upload_format buffer_format;
u32 offset_in_heap; u32 offset_in_heap;
u32 data_length; u32 data_length;
}; };
template <typename upload_format> // A weak vertex cache with no data checks or memory range locks
class weak_vertex_cache : public default_vertex_cache<uploaded_range<upload_format>, upload_format> // Of limited use since contents are only guaranteed to be valid once per frame
// Supports upto 1GiB block lengths if typed and full 4GiB otherwise.
// Using a 1:1 hash-value with robin-hood is 2x faster than what we had before with std-map-of-arrays.
class weak_vertex_cache : public default_vertex_cache<uploaded_range>
{ {
using storage_type = uploaded_range<upload_format>; using storage_type = uploaded_range;
private: private:
std::unordered_map<uptr, std::vector<storage_type>> vertex_ranges; rsx::unordered_map<uptr, storage_type> vertex_ranges;
FORCE_INLINE u64 hash(u32 local_addr, u32 data_length) const
{
return u64(local_addr) | (u64(data_length) << 32);
}
public: public:
storage_type* find_vertex_range(uptr local_addr, upload_format fmt, u32 data_length) override const storage_type* find_vertex_range(u32 local_addr, u32 data_length) override
{ {
auto found = vertex_ranges.find(local_addr); const auto key = hash(local_addr, data_length);
const auto found = vertex_ranges.find(key);
if (found == vertex_ranges.end()) if (found == vertex_ranges.end())
{ {
return nullptr; return nullptr;
} }
for (auto &v : found->second) return std::addressof(found->second);
{
// NOTE: This has to match exactly. Using sized shortcuts such as >= comparison causes artifacting in some applications (UC1)
if (v.data_length == data_length && v.buffer_format == fmt)
{
return &v;
}
} }
return nullptr; void store_range(u32 local_addr, u32 data_length, u32 offset_in_heap) override
}
void store_range(uptr local_addr, upload_format fmt, u32 data_length, u32 offset_in_heap) override
{ {
storage_type v = {}; storage_type v = {};
v.buffer_format = fmt;
v.data_length = data_length; v.data_length = data_length;
v.local_address = local_addr; v.local_address = local_addr;
v.offset_in_heap = offset_in_heap; v.offset_in_heap = offset_in_heap;
vertex_ranges[local_addr].push_back(v); const auto key = hash(local_addr, data_length);
vertex_ranges[key] = v;
} }
void purge() override void purge() override

View file

@ -556,6 +556,7 @@
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" /> <ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" /> <ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" /> <ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" /> <ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" /> <ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" /> <ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />

View file

@ -2365,6 +2365,9 @@
<ClInclude Include="Emu\Io\emulated_pad_config.h"> <ClInclude Include="Emu\Io\emulated_pad_config.h">
<Filter>Emu\Io</Filter> <Filter>Emu\Io</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="Emu\RSX\Common\unordered_map.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl"> <None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">