mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 18:58:36 +12:00
rsx: Restructuring [WIP]
- Refactor invalidate memory functions into one function - Add cached object rebuilding functionality to avoid throwing away useful memory on an invalidate - Added debug monitoring of texture unit VRAM usage
This commit is contained in:
parent
b0737d1c90
commit
5e58cf6079
8 changed files with 263 additions and 247 deletions
|
@ -46,6 +46,8 @@ struct data_heap
|
||||||
size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
|
size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
|
||||||
size_t m_current_allocated_size;
|
size_t m_current_allocated_size;
|
||||||
size_t m_largest_allocated_pool;
|
size_t m_largest_allocated_pool;
|
||||||
|
|
||||||
|
char* m_name;
|
||||||
public:
|
public:
|
||||||
data_heap() = default;
|
data_heap() = default;
|
||||||
~data_heap() = default;
|
~data_heap() = default;
|
||||||
|
@ -54,8 +56,10 @@ public:
|
||||||
|
|
||||||
size_t m_get_pos; // End of free space
|
size_t m_get_pos; // End of free space
|
||||||
|
|
||||||
void init(size_t heap_size, size_t min_guard_size=0x10000)
|
void init(size_t heap_size, const char* buffer_name = "unnamed", size_t min_guard_size=0x10000)
|
||||||
{
|
{
|
||||||
|
m_name = const_cast<char*>(buffer_name);
|
||||||
|
|
||||||
m_size = heap_size;
|
m_size = heap_size;
|
||||||
m_put_pos = 0;
|
m_put_pos = 0;
|
||||||
m_get_pos = heap_size - 1;
|
m_get_pos = heap_size - 1;
|
||||||
|
@ -71,8 +75,8 @@ public:
|
||||||
{
|
{
|
||||||
if (!can_alloc<Alignement>(size))
|
if (!can_alloc<Alignement>(size))
|
||||||
{
|
{
|
||||||
fmt::throw_exception("Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
|
fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
|
||||||
m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
|
m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t alloc_size = align(size, Alignement);
|
size_t alloc_size = align(size, Alignement);
|
||||||
|
|
|
@ -31,6 +31,8 @@ namespace rsx
|
||||||
u16 real_pitch;
|
u16 real_pitch;
|
||||||
u16 rsx_pitch;
|
u16 rsx_pitch;
|
||||||
|
|
||||||
|
u64 cache_tag;
|
||||||
|
|
||||||
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
|
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
|
||||||
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
|
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
|
||||||
|
|
||||||
|
@ -162,7 +164,8 @@ namespace rsx
|
||||||
|
|
||||||
//Memory usage
|
//Memory usage
|
||||||
const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated
|
const s32 m_max_zombie_objects = 128; //Limit on how many texture objects to keep around for reuse after they are invalidated
|
||||||
s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory
|
std::atomic<s32> m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory
|
||||||
|
std::atomic<u32> m_texture_memory_in_use = { 0 };
|
||||||
|
|
||||||
/* Helpers */
|
/* Helpers */
|
||||||
virtual void free_texture_section(section_storage_type&) = 0;
|
virtual void free_texture_section(section_storage_type&) = 0;
|
||||||
|
@ -179,10 +182,14 @@ namespace rsx
|
||||||
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
|
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
//Internal implementation methods
|
//Internal implementation methods and helpers
|
||||||
bool invalidate_range_impl(u32 address, u32 range, bool unprotect)
|
|
||||||
|
//Get intersecting set - Returns all objects intersecting a given range and their owning blocks
|
||||||
|
std::vector<std::pair<section_storage_type*, ranged_storage*>> get_intersecting_set(u32 address, u32 range, bool check_whole_size)
|
||||||
{
|
{
|
||||||
|
std::vector<std::pair<section_storage_type*, ranged_storage*>> result;
|
||||||
bool response = false;
|
bool response = false;
|
||||||
|
u64 cache_tag = get_system_time();
|
||||||
u32 last_dirty_block = UINT32_MAX;
|
u32 last_dirty_block = UINT32_MAX;
|
||||||
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
||||||
|
|
||||||
|
@ -195,7 +202,7 @@ namespace rsx
|
||||||
if (base == last_dirty_block && range_data.valid_count == 0)
|
if (base == last_dirty_block && range_data.valid_count == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (trampled_range.first < trampled_range.second)
|
if (trampled_range.first <= trampled_range.second)
|
||||||
{
|
{
|
||||||
//Only if a valid range, ignore empty sets
|
//Only if a valid range, ignore empty sets
|
||||||
if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)
|
if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)
|
||||||
|
@ -205,11 +212,10 @@ namespace rsx
|
||||||
for (int i = 0; i < range_data.data.size(); i++)
|
for (int i = 0; i < range_data.data.size(); i++)
|
||||||
{
|
{
|
||||||
auto &tex = range_data.data[i];
|
auto &tex = range_data.data[i];
|
||||||
|
if (tex.cache_tag == cache_tag) continue; //already processed
|
||||||
if (tex.is_dirty()) continue;
|
|
||||||
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
|
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
|
||||||
|
|
||||||
auto overlapped = tex.overlaps_page(trampled_range, address, false);
|
auto overlapped = tex.overlaps_page(trampled_range, address, check_whole_size);
|
||||||
if (std::get<0>(overlapped))
|
if (std::get<0>(overlapped))
|
||||||
{
|
{
|
||||||
auto &new_range = std::get<1>(overlapped);
|
auto &new_range = std::get<1>(overlapped);
|
||||||
|
@ -222,19 +228,8 @@ namespace rsx
|
||||||
range_reset = true;
|
range_reset = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unprotect)
|
tex.cache_tag = cache_tag;
|
||||||
{
|
result.push_back({&tex, &range_data});
|
||||||
tex.set_dirty(true);
|
|
||||||
tex.unprotect();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tex.discard();
|
|
||||||
}
|
|
||||||
|
|
||||||
m_unreleased_texture_objects++;
|
|
||||||
range_data.remove_one();
|
|
||||||
response = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,77 +240,70 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return response;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Invalidate range base implementation
|
||||||
|
//Returns a pair:
|
||||||
|
//1. A boolean - true if the memory range was truly locked and has been dealt with, false otherwise
|
||||||
|
//2. A vector of all sections that should be flushed if the caller did not set the allow_flush method. That way the caller can make preparations on how to deal with sections that require flushing
|
||||||
|
// Note that the sections will be unlocked regardless of the allow_flush flag
|
||||||
template <typename ...Args>
|
template <typename ...Args>
|
||||||
bool flush_address_impl(u32 address, Args&&... extras)
|
std::pair<bool, std::vector<section_storage_type*>> invalidate_range_impl_base(u32 address, u32 range, bool discard_only, bool rebuild_cache, bool allow_flush, Args&... extras)
|
||||||
{
|
{
|
||||||
bool response = false;
|
auto trampled_set = get_intersecting_set(address, range, allow_flush);
|
||||||
u32 last_dirty_block = UINT32_MAX;
|
|
||||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
if (trampled_set.size() > 0)
|
||||||
|
{
|
||||||
|
// Rebuild the cache by only destroying ranges that need to be destroyed to unlock this page
|
||||||
|
const auto to_reprotect = std::remove_if(trampled_set.begin(), trampled_set.end(),
|
||||||
|
[&](const std::pair<section_storage_type*, ranged_storage*>& obj)
|
||||||
|
{
|
||||||
|
if (!rebuild_cache && !obj.first->is_flushable())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const std::pair<u32, u32> null_check = std::make_pair(UINT32_MAX, 0);
|
||||||
|
return !std::get<0>(obj.first->overlaps_page(null_check, address, true));
|
||||||
|
});
|
||||||
|
|
||||||
std::vector<section_storage_type*> sections_to_flush;
|
std::vector<section_storage_type*> sections_to_flush;
|
||||||
|
for (auto It = trampled_set.begin(); It != to_reprotect; ++It)
|
||||||
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
|
|
||||||
{
|
{
|
||||||
auto &range_data = It->second;
|
auto obj = *It;
|
||||||
const u32 base = It->first;
|
|
||||||
bool range_reset = false;
|
|
||||||
|
|
||||||
if (base == last_dirty_block && range_data.valid_count == 0)
|
if (discard_only)
|
||||||
continue;
|
obj.first->discard();
|
||||||
|
else
|
||||||
|
obj.first->unprotect();
|
||||||
|
|
||||||
if (trampled_range.first < trampled_range.second)
|
if (obj.first->is_flushable() && allow_flush)
|
||||||
{
|
{
|
||||||
//Only if a valid range, ignore empty sets
|
sections_to_flush.push_back(obj.first);
|
||||||
if (trampled_range.first >= (range_data.max_addr + range_data.max_range) || range_data.min_addr >= trampled_range.second)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < range_data.data.size(); i++)
|
|
||||||
{
|
|
||||||
auto &tex = range_data.data[i];
|
|
||||||
|
|
||||||
if (tex.is_dirty()) continue;
|
|
||||||
if (!tex.is_locked()) continue;
|
|
||||||
|
|
||||||
auto overlapped = tex.overlaps_page(trampled_range, address, true);
|
|
||||||
if (std::get<0>(overlapped))
|
|
||||||
{
|
|
||||||
auto &new_range = std::get<1>(overlapped);
|
|
||||||
|
|
||||||
if (new_range.first != trampled_range.first ||
|
|
||||||
new_range.second != trampled_range.second)
|
|
||||||
{
|
|
||||||
i = 0;
|
|
||||||
trampled_range = new_range;
|
|
||||||
range_reset = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tex.is_flushable())
|
|
||||||
{
|
|
||||||
sections_to_flush.push_back(&tex);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
obj.first->set_dirty(true);
|
||||||
m_unreleased_texture_objects++;
|
m_unreleased_texture_objects++;
|
||||||
tex.set_dirty(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tex.unprotect();
|
obj.second->remove_one();
|
||||||
range_data.remove_one();
|
|
||||||
|
|
||||||
response = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (range_reset)
|
for (auto It = to_reprotect; It != trampled_set.end(); It++)
|
||||||
{
|
{
|
||||||
It = m_cache.begin();
|
auto obj = *It;
|
||||||
}
|
|
||||||
|
auto old_prot = obj.first->get_protection();
|
||||||
|
obj.first->discard();
|
||||||
|
obj.first->protect(old_prot);
|
||||||
|
obj.first->set_dirty(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto tex : sections_to_flush)
|
trampled_set.erase(to_reprotect, trampled_set.end());
|
||||||
|
|
||||||
|
if (allow_flush)
|
||||||
|
{
|
||||||
|
for (const auto &tex : sections_to_flush)
|
||||||
{
|
{
|
||||||
if (!tex->flush(std::forward<Args>(extras)...))
|
if (!tex->flush(std::forward<Args>(extras)...))
|
||||||
{
|
{
|
||||||
|
@ -325,7 +313,19 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return response;
|
return{ true, {} };
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_pair(true, sections_to_flush);
|
||||||
|
}
|
||||||
|
|
||||||
|
return{ false, {} };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ...Args>
|
||||||
|
std::pair<bool, std::vector<section_storage_type*>> invalidate_range_impl(u32 address, u32 range, bool discard, bool allow_flush, Args&... extras)
|
||||||
|
{
|
||||||
|
return invalidate_range_impl_base(address, range, discard, true, allow_flush, std::forward<Args>(extras)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_hw_blit_engine_compatible(const u32 format) const
|
bool is_hw_blit_engine_compatible(const u32 format) const
|
||||||
|
@ -427,6 +427,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
m_unreleased_texture_objects--;
|
m_unreleased_texture_objects--;
|
||||||
free_texture_section(tex);
|
free_texture_section(tex);
|
||||||
|
m_texture_memory_in_use -= tex.get_section_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
range_data.notify(rsx_address, rsx_size);
|
range_data.notify(rsx_address, rsx_size);
|
||||||
|
@ -562,22 +563,19 @@ namespace rsx
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ...Args>
|
template <typename ...Args>
|
||||||
bool flush_address(u32 address, Args&&... extras)
|
std::pair<bool, std::vector<section_storage_type*>> invalidate_address(u32 address, bool allow_flush, Args&... extras)
|
||||||
{
|
{
|
||||||
if (address < no_access_range.first ||
|
return invalidate_range(address, 4096 - (address & 4095), false, allow_flush, std::forward<Args>(extras)...);
|
||||||
address > no_access_range.second)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
writer_lock lock(m_cache_mutex);
|
|
||||||
return flush_address_impl(address, std::forward<Args>(extras)...);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool invalidate_address(u32 address)
|
template <typename ...Args>
|
||||||
|
std::pair<bool, std::vector<section_storage_type*>> flush_address(u32 address, Args&... extras)
|
||||||
{
|
{
|
||||||
return invalidate_range(address, 4096 - (address & 4095));
|
return invalidate_range(address, 4096 - (address & 4095), false, true, std::forward<Args>(extras)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool invalidate_range(u32 address, u32 range, bool unprotect = true)
|
template <typename ...Args>
|
||||||
|
std::pair<bool, std::vector<section_storage_type*>> invalidate_range(u32 address, u32 range, bool discard, bool allow_flush, Args&... extras)
|
||||||
{
|
{
|
||||||
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
||||||
|
|
||||||
|
@ -587,11 +585,31 @@ namespace rsx
|
||||||
//Doesnt fall in the read_only textures range; check render targets
|
//Doesnt fall in the read_only textures range; check render targets
|
||||||
if (trampled_range.second < no_access_range.first ||
|
if (trampled_range.second < no_access_range.first ||
|
||||||
trampled_range.first > no_access_range.second)
|
trampled_range.first > no_access_range.second)
|
||||||
return false;
|
return{ false, {} };
|
||||||
}
|
}
|
||||||
|
|
||||||
writer_lock lock(m_cache_mutex);
|
writer_lock lock(m_cache_mutex);
|
||||||
return invalidate_range_impl(address, range, unprotect);
|
return invalidate_range_impl(address, range, discard, allow_flush, std::forward<Args>(extras)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ...Args>
|
||||||
|
bool flush_all(std::vector<section_storage_type*>& sections_to_flush, Args&... extras)
|
||||||
|
{
|
||||||
|
reader_lock lock(m_cache_mutex);
|
||||||
|
for (const auto &tex: sections_to_flush)
|
||||||
|
{
|
||||||
|
if (tex->is_flushed())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!tex->flush(std::forward<Args>(extras)...))
|
||||||
|
{
|
||||||
|
//Missed address, note this
|
||||||
|
//TODO: Lower severity when successful to keep the cache from overworking
|
||||||
|
record_cache_miss(*tex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void record_cache_miss(section_storage_type &tex)
|
void record_cache_miss(section_storage_type &tex)
|
||||||
|
@ -670,6 +688,7 @@ namespace rsx
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
free_texture_section(tex);
|
free_texture_section(tex);
|
||||||
|
m_texture_memory_in_use -= tex.get_section_size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -882,6 +901,7 @@ namespace rsx
|
||||||
auto subresources_layout = get_subresources_layout(tex);
|
auto subresources_layout = get_subresources_layout(tex);
|
||||||
auto remap_vector = tex.decoded_remap();
|
auto remap_vector = tex.decoded_remap();
|
||||||
|
|
||||||
|
m_texture_memory_in_use += (tex_pitch * tex_height);
|
||||||
return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
|
return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
|
||||||
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
|
texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view();
|
||||||
}
|
}
|
||||||
|
@ -972,8 +992,8 @@ namespace rsx
|
||||||
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
|
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
|
||||||
|
|
||||||
lock.upgrade();
|
lock.upgrade();
|
||||||
flush_address_impl(src_address, std::forward<Args>(extras)...);
|
invalidate_range_impl(src_address, memcpy_bytes_length, false, true, std::forward<Args>(extras)...);
|
||||||
invalidate_range_impl(dst_address, memcpy_bytes_length, true);
|
invalidate_range_impl(dst_address, memcpy_bytes_length, false, true, std::forward<Args>(extras)...);
|
||||||
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1075,7 +1095,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
lock.upgrade();
|
lock.upgrade();
|
||||||
|
|
||||||
flush_address_impl(src_address, std::forward<Args>(extras)...);
|
invalidate_range_impl(src_address, src.pitch * src.slice_h, false, true, std::forward<Args>(extras)...);
|
||||||
|
|
||||||
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
|
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
|
||||||
std::vector<rsx_subresource_layout> subresource_layout;
|
std::vector<rsx_subresource_layout> subresource_layout;
|
||||||
|
@ -1090,6 +1110,8 @@ namespace rsx
|
||||||
const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
||||||
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src,
|
vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, texture_upload_context::blit_engine_src,
|
||||||
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture();
|
subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture();
|
||||||
|
|
||||||
|
m_texture_memory_in_use += src.pitch * src.slice_h;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1145,7 +1167,7 @@ namespace rsx
|
||||||
if (format_mismatch)
|
if (format_mismatch)
|
||||||
{
|
{
|
||||||
lock.upgrade();
|
lock.upgrade();
|
||||||
invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), true);
|
invalidate_range_impl(cached_dest->get_section_base(), cached_dest->get_section_size(), false, true, std::forward<Args>(extras)...);
|
||||||
|
|
||||||
dest_texture = 0;
|
dest_texture = 0;
|
||||||
cached_dest = nullptr;
|
cached_dest = nullptr;
|
||||||
|
@ -1153,7 +1175,7 @@ namespace rsx
|
||||||
else if (invalidate_dst_range)
|
else if (invalidate_dst_range)
|
||||||
{
|
{
|
||||||
lock.upgrade();
|
lock.upgrade();
|
||||||
invalidate_range_impl(dst_address, dst.pitch * dst.height, true);
|
invalidate_range_impl(dst_address, dst.pitch * dst.height, false, true, std::forward<Args>(extras)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Validate clipping region
|
//Validate clipping region
|
||||||
|
@ -1187,6 +1209,8 @@ namespace rsx
|
||||||
gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
|
gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
|
||||||
dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order,
|
dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order,
|
||||||
default_remap_vector)->get_raw_texture();
|
default_remap_vector)->get_raw_texture();
|
||||||
|
|
||||||
|
m_texture_memory_in_use += dst.pitch * dst_dimensions.height;
|
||||||
}
|
}
|
||||||
|
|
||||||
const f32 scale = rsx::get_resolution_scale();
|
const f32 scale = rsx::get_resolution_scale();
|
||||||
|
@ -1204,5 +1228,10 @@ namespace rsx
|
||||||
{
|
{
|
||||||
return m_unreleased_texture_objects;
|
return m_unreleased_texture_objects;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u32 get_texture_memory_in_use() const
|
||||||
|
{
|
||||||
|
return m_texture_memory_in_use;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1167,7 +1167,9 @@ void GLGSRender::flip(int buffer)
|
||||||
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
||||||
|
|
||||||
auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
||||||
|
auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||||
m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
||||||
|
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
|
||||||
}
|
}
|
||||||
|
|
||||||
m_frame->flip(m_context);
|
m_frame->flip(m_context);
|
||||||
|
@ -1202,19 +1204,15 @@ u64 GLGSRender::timestamp() const
|
||||||
|
|
||||||
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
{
|
{
|
||||||
if (is_writing)
|
bool can_flush = (std::this_thread::get_id() != m_thread_id);
|
||||||
return m_gl_texture_cache.invalidate_address(address);
|
auto result = m_gl_texture_cache.invalidate_address(address, can_flush);
|
||||||
else
|
|
||||||
{
|
|
||||||
if (std::this_thread::get_id() != m_thread_id)
|
|
||||||
{
|
|
||||||
bool flushable;
|
|
||||||
gl::cached_texture_section* section_to_post;
|
|
||||||
|
|
||||||
std::tie(flushable, section_to_post) = m_gl_texture_cache.address_is_flushable(address);
|
if (!result.first)
|
||||||
if (!flushable) return false;
|
return false;
|
||||||
|
|
||||||
work_item &task = post_flush_request(address, section_to_post);
|
if (result.second.size() > 0)
|
||||||
|
{
|
||||||
|
work_item &task = post_flush_request(address, result.second);
|
||||||
|
|
||||||
vm::temporary_unlock();
|
vm::temporary_unlock();
|
||||||
{
|
{
|
||||||
|
@ -1223,16 +1221,16 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
}
|
}
|
||||||
|
|
||||||
task.received = true;
|
task.received = true;
|
||||||
return task.result;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return m_gl_texture_cache.flush_address(address);
|
return false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||||
{
|
{
|
||||||
if (m_gl_texture_cache.invalidate_range(address_base, size, false))
|
//Discard all memory in that range without bothering with writeback (Force it for strict?)
|
||||||
|
if (std::get<0>(m_gl_texture_cache.invalidate_range(address_base, size, true, false)))
|
||||||
m_gl_texture_cache.purge_dirty();
|
m_gl_texture_cache.purge_dirty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1249,20 +1247,7 @@ void GLGSRender::do_local_task()
|
||||||
if (q.processed) continue;
|
if (q.processed) continue;
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock(q.guard_mutex);
|
std::unique_lock<std::mutex> lock(q.guard_mutex);
|
||||||
|
q.result = m_gl_texture_cache.flush_all(q.sections_to_flush);
|
||||||
//Check if the suggested section is valid
|
|
||||||
if (!q.section_to_flush->is_flushed())
|
|
||||||
{
|
|
||||||
m_gl_texture_cache.flush_address(q.address_to_flush);
|
|
||||||
q.result = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//Another thread has unlocked this memory region already
|
|
||||||
//Return success
|
|
||||||
q.result = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
q.processed = true;
|
q.processed = true;
|
||||||
|
|
||||||
//Notify thread waiting on this
|
//Notify thread waiting on this
|
||||||
|
@ -1271,14 +1256,14 @@ void GLGSRender::do_local_task()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
work_item& GLGSRender::post_flush_request(u32 address, gl::cached_texture_section *section)
|
work_item& GLGSRender::post_flush_request(u32 address, std::vector<gl::cached_texture_section*>& sections)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(queue_guard);
|
std::lock_guard<std::mutex> lock(queue_guard);
|
||||||
|
|
||||||
work_queue.emplace_back();
|
work_queue.emplace_back();
|
||||||
work_item &result = work_queue.back();
|
work_item &result = work_queue.back();
|
||||||
result.address_to_flush = address;
|
result.address_to_flush = address;
|
||||||
result.section_to_flush = section;
|
result.sections_to_flush = std::move(sections);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ struct work_item
|
||||||
std::mutex guard_mutex;
|
std::mutex guard_mutex;
|
||||||
|
|
||||||
u32 address_to_flush = 0;
|
u32 address_to_flush = 0;
|
||||||
gl::cached_texture_section *section_to_flush = nullptr;
|
std::vector<gl::cached_texture_section*> sections_to_flush;
|
||||||
|
|
||||||
volatile bool processed = false;
|
volatile bool processed = false;
|
||||||
volatile bool result = false;
|
volatile bool result = false;
|
||||||
|
@ -428,7 +428,7 @@ public:
|
||||||
void set_viewport();
|
void set_viewport();
|
||||||
|
|
||||||
void synchronize_buffers();
|
void synchronize_buffers();
|
||||||
work_item& post_flush_request(u32 address, gl::cached_texture_section *section);
|
work_item& post_flush_request(u32 address, std::vector<gl::cached_texture_section*>& sections);
|
||||||
|
|
||||||
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
|
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
|
||||||
|
|
||||||
|
|
|
@ -403,7 +403,7 @@ void GLGSRender::read_buffers()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_gl_texture_cache.invalidate_range(texaddr, range);
|
m_gl_texture_cache.invalidate_range(texaddr, range, false, true);
|
||||||
|
|
||||||
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
|
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
|
||||||
color_buffer.read(buffer.get(), width, height, pitch);
|
color_buffer.read(buffer.get(), width, height, pitch);
|
||||||
|
|
|
@ -583,13 +583,13 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||||
|
|
||||||
//VRAM allocation
|
//VRAM allocation
|
||||||
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, 0x400000);
|
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
|
||||||
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
|
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
|
||||||
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
|
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
|
||||||
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
|
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
|
||||||
m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000);
|
m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
||||||
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
|
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
|
||||||
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
|
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 0x400000);
|
||||||
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
||||||
|
|
||||||
for (auto &ctx : frame_context_storage)
|
for (auto &ctx : frame_context_storage)
|
||||||
|
@ -739,24 +739,27 @@ VKGSRender::~VKGSRender()
|
||||||
|
|
||||||
bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
{
|
{
|
||||||
if (is_writing)
|
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||||
return m_texture_cache.invalidate_address(address);
|
auto result = m_texture_cache.invalidate_address(address, false, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
||||||
else
|
|
||||||
{
|
|
||||||
if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer)
|
|
||||||
{
|
|
||||||
bool flushable;
|
|
||||||
vk::cached_texture_section* section;
|
|
||||||
|
|
||||||
std::tie(flushable, section) = m_texture_cache.address_is_flushable(address);
|
if (!result.first)
|
||||||
|
|
||||||
if (!flushable)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const u64 sync_timestamp = section->get_sync_timestamp();
|
if (result.second.size() > 0)
|
||||||
|
{
|
||||||
const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
|
const bool is_rsxthr = std::this_thread::get_id() == rsx_thread;
|
||||||
|
bool has_queue_ref = false;
|
||||||
|
|
||||||
if (section->is_synchronized())
|
u64 sync_timestamp = 0ull;
|
||||||
|
for (const auto& tex : result.second)
|
||||||
|
sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp());
|
||||||
|
|
||||||
|
if (!is_rsxthr)
|
||||||
|
{
|
||||||
|
vm::temporary_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sync_timestamp > 0)
|
||||||
{
|
{
|
||||||
//Wait for any cb submitted after the sync timestamp to finish
|
//Wait for any cb submitted after the sync timestamp to finish
|
||||||
while (true)
|
while (true)
|
||||||
|
@ -794,8 +797,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//This region is buffered, but no previous sync point has been put in place to start sync efforts
|
|
||||||
//Just stall and get what we have at this point
|
|
||||||
if (!is_rsxthr)
|
if (!is_rsxthr)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
|
@ -812,32 +813,16 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
_mm_pause();
|
_mm_pause();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
has_queue_ref = true;
|
||||||
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_texture_cache.flush_all(result.second, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
||||||
|
|
||||||
|
if (has_queue_ref)
|
||||||
|
{
|
||||||
m_queued_threads--;
|
m_queued_threads--;
|
||||||
_mm_sfence();
|
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
//NOTE: If the rsx::thread is trampling its own data, we have an operation that should be moved to the GPU
|
|
||||||
//We should never interrupt our own cb recording since some operations are not interruptible
|
|
||||||
if (!vk::is_uninterruptible())
|
|
||||||
//TODO: Investigate driver behaviour to determine if we need a hard sync or a soft flush
|
|
||||||
flush_command_queue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//If we aren't managing buffer sync, dont bother checking the cache
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
|
||||||
return m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -845,9 +830,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
|
|
||||||
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||||
{
|
{
|
||||||
if (m_texture_cache.invalidate_range(address_base, size, false))
|
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||||
|
if (std::get<0>(m_texture_cache.invalidate_range(address_base, size, false, false,
|
||||||
|
*m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue())))
|
||||||
|
{
|
||||||
m_texture_cache.purge_dirty();
|
m_texture_cache.purge_dirty();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VKGSRender::begin()
|
void VKGSRender::begin()
|
||||||
{
|
{
|
||||||
|
@ -2651,7 +2640,9 @@ void VKGSRender::flip(int buffer)
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
|
||||||
|
|
||||||
auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
||||||
|
auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
||||||
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
|
||||||
|
|
||||||
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
|
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
|
||||||
m_framebuffers_to_clean.push_back(std::move(direct_fbo));
|
m_framebuffers_to_clean.push_back(std::move(direct_fbo));
|
||||||
|
|
|
@ -347,6 +347,7 @@ namespace vk
|
||||||
|
|
||||||
m_discardable_storage.clear();
|
m_discardable_storage.clear();
|
||||||
m_unreleased_texture_objects = 0;
|
m_unreleased_texture_objects = 0;
|
||||||
|
m_texture_memory_in_use = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -707,12 +708,13 @@ namespace vk
|
||||||
}
|
}
|
||||||
helper(&cmd);
|
helper(&cmd);
|
||||||
|
|
||||||
return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, m_submit_queue);
|
const VkQueue& queue = m_submit_queue;
|
||||||
|
return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 get_unreleased_textures_count() const override
|
const u32 get_unreleased_textures_count() const override
|
||||||
{
|
{
|
||||||
return std::max(m_unreleased_texture_objects, 0) + (u32)m_discardable_storage.size();
|
return m_unreleased_texture_objects + (u32)m_discardable_storage.size();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -238,6 +238,11 @@ namespace rsx
|
||||||
|
|
||||||
return std::make_pair(min, max);
|
return std::make_pair(min, max);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
utils::protection get_protection()
|
||||||
|
{
|
||||||
|
return protection;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename pipeline_storage_type, typename backend_storage>
|
template <typename pipeline_storage_type, typename backend_storage>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue