vulkan: Texture cache rewritten - Use a map of vectors instead of a flat array

This commit is contained in:
kd-11 2017-07-24 20:50:32 +03:00
parent 46fa6e47fe
commit dd19622823

View file

@ -5,6 +5,7 @@
#include "Emu/System.h" #include "Emu/System.h"
#include "../Common/TextureUtils.h" #include "../Common/TextureUtils.h"
#include "../rsx_utils.h" #include "../rsx_utils.h"
#include "Utilities/mutex.h"
namespace vk namespace vk
{ {
@ -292,9 +293,34 @@ namespace vk
class texture_cache class texture_cache
{ {
struct ranged_storage
{
std::vector<cached_texture_section> data; //Stored data
std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks
u32 max_range = 0; //Largest stored block
void notify(u32 data_size)
{
max_range = std::max(data_size, max_range);
valid_count++;
}
void add(cached_texture_section& section, u32 data_size)
{
max_range = std::max(data_size, max_range);
valid_count++;
data.push_back(std::move(section));
}
};
private: private:
std::vector<cached_texture_section> m_cache; shared_mutex m_cache_mutex;
std::pair<u32, u32> texture_cache_range = std::make_pair(0xFFFFFFFF, 0); std::unordered_map<u32, ranged_storage> m_cache;
std::pair<u32, u32> read_only_range = std::make_pair(0xFFFFFFFF, 0);
std::pair<u32, u32> no_access_range = std::make_pair(0xFFFFFFFF, 0);
std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view; std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view;
std::vector<std::unique_ptr<vk::image>> m_dirty_textures; std::vector<std::unique_ptr<vk::image>> m_dirty_textures;
@ -310,7 +336,15 @@ namespace vk
cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{ {
for (auto &tex : m_cache) {
reader_lock lock(m_cache_mutex);
auto found = m_cache.find(rsx_address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{ {
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
{ {
@ -326,7 +360,7 @@ namespace vk
} }
} }
for (auto &tex : m_cache) for (auto &tex : range_data.data)
{ {
if (tex.is_dirty()) if (tex.is_dirty())
{ {
@ -337,18 +371,29 @@ namespace vk
} }
tex.release_dma_resources(); tex.release_dma_resources();
range_data.notify(rsx_size);
return tex; return tex;
} }
} }
}
}
m_cache.push_back(cached_texture_section()); writer_lock lock(m_cache_mutex);
return m_cache[m_cache.size() - 1]; cached_texture_section tmp;
m_cache[rsx_address].add(tmp, rsx_size);
return m_cache[rsx_address].data.back();
} }
cached_texture_section* find_flushable_section(const u32 address, const u32 range) cached_texture_section* find_flushable_section(const u32 address, const u32 range)
{ {
for (auto &tex : m_cache) reader_lock lock(m_cache_mutex);
auto found = m_cache.find(address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{ {
if (tex.is_dirty()) continue; if (tex.is_dirty()) continue;
if (!tex.is_flushable() && !tex.is_flushed()) continue; if (!tex.is_flushable() && !tex.is_flushed()) continue;
@ -356,13 +401,17 @@ namespace vk
if (tex.matches(address, range)) if (tex.matches(address, range))
return &tex; return &tex;
} }
}
return nullptr; return nullptr;
} }
void purge_cache() void purge_cache()
{ {
for (auto &tex : m_cache) for (auto &address_range : m_cache)
{
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{ {
if (tex.exists()) if (tex.exists())
{ {
@ -376,10 +425,11 @@ namespace vk
tex.release_dma_resources(); tex.release_dma_resources();
} }
range_data.data.resize(0);
}
m_temporary_image_view.clear(); m_temporary_image_view.clear();
m_dirty_textures.clear(); m_dirty_textures.clear();
m_cache.resize(0);
} }
//Helpers //Helpers
@ -611,13 +661,14 @@ namespace vk
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
vk::leave_uninterruptible(); vk::leave_uninterruptible();
writer_lock lock(m_cache_mutex);
region.reset(texaddr, range); region.reset(texaddr, range);
region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image); region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image);
region.protect(utils::protection::ro); region.protect(utils::protection::ro);
region.set_dirty(false); region.set_dirty(false);
texture_cache_range = region.get_min_max(texture_cache_range); read_only_range = region.get_min_max(read_only_range);
return view; return view;
} }
@ -625,11 +676,13 @@ namespace vk
{ {
cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1); cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
writer_lock lock(m_cache_mutex);
if (!region.is_locked()) if (!region.is_locked())
{ {
region.reset(memory_address, memory_size); region.reset(memory_address, memory_size);
region.set_dirty(false); region.set_dirty(false);
texture_cache_range = region.get_min_max(texture_cache_range); no_access_range = region.get_min_max(no_access_range);
} }
region.protect(utils::protection::no); region.protect(utils::protection::no);
@ -656,11 +709,17 @@ namespace vk
std::tuple<bool, bool> address_is_flushable(u32 address) std::tuple<bool, bool> address_is_flushable(u32 address)
{ {
if (address < texture_cache_range.first || if (address < no_access_range.first ||
address > texture_cache_range.second) address > no_access_range.second)
return std::make_tuple(false, false); return std::make_tuple(false, false);
for (auto &tex : m_cache) reader_lock lock(m_cache_mutex);
auto found = m_cache.find(address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{ {
if (tex.is_dirty()) continue; if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue; if (!tex.is_flushable()) continue;
@ -668,22 +727,70 @@ namespace vk
if (tex.overlaps(address)) if (tex.overlaps(address))
return std::make_tuple(true, tex.is_synchronized()); return std::make_tuple(true, tex.is_synchronized());
} }
}
for (auto &address_range : m_cache)
{
if (address_range.first == address)
continue;
auto &range_data = address_range.second;
//Quickly discard range
const u32 lock_base = address_range.first & ~0xfff;
const u32 lock_limit = align(range_data.max_range + address_range.first, 4096);
if (address < lock_base || address >= lock_limit)
continue;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return std::make_tuple(true, tex.is_synchronized());
}
}
return std::make_tuple(false, false); return std::make_tuple(false, false);
} }
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
{ {
if (address < texture_cache_range.first || if (address < no_access_range.first ||
address > texture_cache_range.second) address > no_access_range.second)
return false; return false;
bool response = false; bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0); std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
for (int i = 0; i < m_cache.size(); ++i) reader_lock lock(m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{ {
auto &tex = m_cache[i]; auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
{
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue; if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue; if (!tex.is_flushable()) continue;
@ -696,8 +803,9 @@ namespace vk
if (new_range.first != trampled_range.first || if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second) new_range.second != trampled_range.second)
{ {
trampled_range = new_range;
i = 0; i = 0;
trampled_range = new_range;
range_reset = true;
} }
//TODO: Map basic host_visible memory without coherent constraint //TODO: Map basic host_visible memory without coherent constraint
@ -712,21 +820,58 @@ namespace vk
} }
} }
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response; return response;
} }
bool invalidate_address(u32 address) bool invalidate_address(u32 address)
{ {
if (address < texture_cache_range.first || if (address < read_only_range.first ||
address > texture_cache_range.second) address > read_only_range.second)
{
//Doesnt fall in the read_only textures range; check render targets
if (address < no_access_range.first ||
address > no_access_range.second)
return false; return false;
}
bool response = false; bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0); std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
for (int i = 0; i < m_cache.size(); ++i) reader_lock lock(m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{ {
auto &tex = m_cache[i]; auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
{
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue; if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
@ -739,17 +884,31 @@ namespace vk
if (new_range.first != trampled_range.first || if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second) new_range.second != trampled_range.second)
{ {
trampled_range = new_range;
i = 0; i = 0;
trampled_range = new_range;
range_reset = true;
} }
// Upgrade to writer lock
lock.upgrade();
tex.set_dirty(true); tex.set_dirty(true);
tex.unprotect(); tex.unprotect();
range_data.valid_count--;
response = true; response = true;
} }
} }
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response; return response;
} }