mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-07 07:21:25 +12:00
rsx/gl/vk: Invalidate texture regions when memory is unmapped
- Free GPU resources immediately if mappings change to avoid leaking VRAM
This commit is contained in:
parent
00b0311c86
commit
1da732bbf5
11 changed files with 128 additions and 54 deletions
|
@ -6,6 +6,7 @@
|
||||||
#include "Utilities/VirtualMemory.h"
|
#include "Utilities/VirtualMemory.h"
|
||||||
#include "Emu/CPU/CPUThread.h"
|
#include "Emu/CPU/CPUThread.h"
|
||||||
#include "Emu/Cell/lv2/sys_memory.h"
|
#include "Emu/Cell/lv2/sys_memory.h"
|
||||||
|
#include "Emu/RSX/GSRender.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
|
@ -656,6 +657,7 @@ namespace vm
|
||||||
if (found != m_map.end())
|
if (found != m_map.end())
|
||||||
{
|
{
|
||||||
const u32 size = found->second;
|
const u32 size = found->second;
|
||||||
|
const auto rsxthr = fxm::get<GSRender>();
|
||||||
|
|
||||||
// Remove entry
|
// Remove entry
|
||||||
m_map.erase(found);
|
m_map.erase(found);
|
||||||
|
@ -663,6 +665,9 @@ namespace vm
|
||||||
// Unmap "real" memory pages
|
// Unmap "real" memory pages
|
||||||
_page_unmap(addr, size);
|
_page_unmap(addr, size);
|
||||||
|
|
||||||
|
// Notify rsx to invalidate range
|
||||||
|
if (rsxthr != nullptr) rsxthr->on_notify_memory_unmapped(addr, size);
|
||||||
|
|
||||||
// Write supplementary info if necessary
|
// Write supplementary info if necessary
|
||||||
if (sup_out) *sup_out = m_sup[addr];
|
if (sup_out) *sup_out = m_sup[addr];
|
||||||
|
|
||||||
|
|
|
@ -1133,6 +1133,12 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
return m_gl_texture_cache.flush_section(address);
|
return m_gl_texture_cache.flush_section(address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||||
|
{
|
||||||
|
if (m_gl_texture_cache.invalidate_range(address_base, size, false))
|
||||||
|
m_gl_texture_cache.purge_dirty();
|
||||||
|
}
|
||||||
|
|
||||||
void GLGSRender::do_local_task()
|
void GLGSRender::do_local_task()
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(queue_guard);
|
std::lock_guard<std::mutex> lock(queue_guard);
|
||||||
|
|
|
@ -426,6 +426,7 @@ protected:
|
||||||
u32 get_zcull_stats(u32 type) override;
|
u32 get_zcull_stats(u32 type) override;
|
||||||
|
|
||||||
bool on_access_violation(u32 address, bool is_writing) override;
|
bool on_access_violation(u32 address, bool is_writing) override;
|
||||||
|
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
|
||||||
|
|
||||||
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
||||||
virtual std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
|
virtual std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
|
||||||
|
|
|
@ -286,6 +286,10 @@ namespace gl
|
||||||
|
|
||||||
void destroy()
|
void destroy()
|
||||||
{
|
{
|
||||||
|
if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty())
|
||||||
|
//Already destroyed
|
||||||
|
return;
|
||||||
|
|
||||||
if (locked)
|
if (locked)
|
||||||
unprotect();
|
unprotect();
|
||||||
|
|
||||||
|
@ -938,16 +942,21 @@ namespace gl
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mark_as_dirty(u32 address)
|
bool mark_as_dirty(u32 address)
|
||||||
|
{
|
||||||
|
return invalidate_range(address, 4096 - (address & 4095));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool invalidate_range(u32 address, u32 size, bool unprotect=true)
|
||||||
{
|
{
|
||||||
bool response = false;
|
bool response = false;
|
||||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
std::pair<u32, u32> trampled_range = std::make_pair(address, address + size);
|
||||||
|
|
||||||
//TODO: Optimize this function!
|
//TODO: Optimize this function!
|
||||||
//Multi-pass checking is slow. Pre-calculate dependency tree at section creation
|
//Multi-pass checking is slow. Pre-calculate dependency tree at section creation
|
||||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
||||||
|
|
||||||
if (address >= read_only_range.first &&
|
if (trampled_range.second >= read_only_range.first &&
|
||||||
address < read_only_range.second)
|
trampled_range.first < read_only_range.second)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < read_only_memory_sections.size(); ++i)
|
for (int i = 0; i < read_only_memory_sections.size(); ++i)
|
||||||
{
|
{
|
||||||
|
@ -966,15 +975,24 @@ namespace gl
|
||||||
i = 0;
|
i = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
tex.unprotect();
|
if (unprotect)
|
||||||
tex.set_dirty(true);
|
{
|
||||||
|
tex.unprotect();
|
||||||
|
tex.set_dirty(true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//abandon memory
|
||||||
|
tex.discard();
|
||||||
|
}
|
||||||
|
|
||||||
response = true;
|
response = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (address >= no_access_range.first &&
|
if (trampled_range.second >= no_access_range.first &&
|
||||||
address < no_access_range.second)
|
trampled_range.first < no_access_range.second)
|
||||||
{
|
{
|
||||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
||||||
|
|
||||||
|
@ -995,8 +1013,16 @@ namespace gl
|
||||||
i = 0;
|
i = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
tex.unprotect();
|
if (unprotect)
|
||||||
tex.set_dirty(true);
|
{
|
||||||
|
tex.unprotect();
|
||||||
|
tex.set_dirty(true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LOG_WARNING(RSX, "Framebuffer region 0x%X -> 0x%X is being discarded", tex.get_section_base(), tex.get_section_base() + tex.get_section_size());
|
||||||
|
tex.discard();
|
||||||
|
}
|
||||||
|
|
||||||
response = true;
|
response = true;
|
||||||
}
|
}
|
||||||
|
@ -1006,35 +1032,6 @@ namespace gl
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
void invalidate_range(u32 base, u32 size)
|
|
||||||
{
|
|
||||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_section_mutex);
|
|
||||||
std::pair<u32, u32> range = std::make_pair(base, size);
|
|
||||||
|
|
||||||
if (base < read_only_range.second &&
|
|
||||||
(base + size) >= read_only_range.first)
|
|
||||||
{
|
|
||||||
for (cached_texture_section &tex : read_only_memory_sections)
|
|
||||||
{
|
|
||||||
if (!tex.is_dirty() && tex.overlaps(range))
|
|
||||||
tex.destroy();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (base < no_access_range.second &&
|
|
||||||
(base + size) >= no_access_range.first)
|
|
||||||
{
|
|
||||||
for (cached_texture_section &tex : no_access_memory_sections)
|
|
||||||
{
|
|
||||||
if (!tex.is_dirty() && tex.overlaps(range))
|
|
||||||
{
|
|
||||||
tex.unprotect();
|
|
||||||
tex.set_dirty(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool flush_section(u32 address);
|
bool flush_section(u32 address);
|
||||||
|
|
||||||
void clear_temporary_surfaces()
|
void clear_temporary_surfaces()
|
||||||
|
@ -1047,6 +1044,19 @@ namespace gl
|
||||||
m_temporary_surfaces.clear();
|
m_temporary_surfaces.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void purge_dirty()
|
||||||
|
{
|
||||||
|
reader_lock lock(m_section_mutex);
|
||||||
|
|
||||||
|
for (cached_texture_section &tex : read_only_memory_sections)
|
||||||
|
{
|
||||||
|
if (tex.is_dirty())
|
||||||
|
{
|
||||||
|
tex.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts)
|
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts)
|
||||||
{
|
{
|
||||||
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
|
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
|
||||||
|
|
|
@ -973,7 +973,7 @@ namespace rsx
|
||||||
|
|
||||||
if (vertex_push_buffers[index].size > 0)
|
if (vertex_push_buffers[index].size > 0)
|
||||||
{
|
{
|
||||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, vertex_push_buffers[index].data.size() * (u32)sizeof(u32));
|
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
|
||||||
result.volatile_blocks.push_back(volatile_range_info);
|
result.volatile_blocks.push_back(volatile_range_info);
|
||||||
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
result.attribute_placement[index] = attribute_buffer_placement::transient;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -253,6 +253,7 @@ namespace rsx
|
||||||
virtual void flip(int buffer) = 0;
|
virtual void flip(int buffer) = 0;
|
||||||
virtual u64 timestamp() const;
|
virtual u64 timestamp() const;
|
||||||
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
||||||
|
virtual void on_notify_memory_unmapped(u32 /*address_base*/, u32 /*size*/) {}
|
||||||
|
|
||||||
//zcull
|
//zcull
|
||||||
virtual void notify_zcull_info_changed() {}
|
virtual void notify_zcull_info_changed() {}
|
||||||
|
|
|
@ -578,6 +578,9 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
|
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
|
||||||
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
||||||
|
|
||||||
|
//Empty view to bind to buffer locations without data
|
||||||
|
m_null_buffer_view.reset(new vk::buffer_view(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
||||||
|
|
||||||
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
|
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
|
||||||
|
|
||||||
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
||||||
|
@ -658,6 +661,7 @@ VKGSRender::~VKGSRender()
|
||||||
vk::destroy_global_resources();
|
vk::destroy_global_resources();
|
||||||
|
|
||||||
//Data heaps/buffers
|
//Data heaps/buffers
|
||||||
|
m_null_buffer_view.reset();
|
||||||
m_index_buffer_ring_info.heap.reset();
|
m_index_buffer_ring_info.heap.reset();
|
||||||
m_uniform_buffer_ring_info.heap.reset();
|
m_uniform_buffer_ring_info.heap.reset();
|
||||||
m_attrib_ring_info.heap.reset();
|
m_attrib_ring_info.heap.reset();
|
||||||
|
@ -791,6 +795,12 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||||
|
{
|
||||||
|
if (m_texture_cache.invalidate_range(address_base, size, false))
|
||||||
|
m_texture_cache.flush(true);
|
||||||
|
}
|
||||||
|
|
||||||
void VKGSRender::begin()
|
void VKGSRender::begin()
|
||||||
{
|
{
|
||||||
rsx::thread::begin();
|
rsx::thread::begin();
|
||||||
|
|
|
@ -129,11 +129,12 @@ private:
|
||||||
|
|
||||||
vk::render_device *m_device;
|
vk::render_device *m_device;
|
||||||
vk::swap_chain* m_swap_chain;
|
vk::swap_chain* m_swap_chain;
|
||||||
//buffer
|
|
||||||
|
|
||||||
|
//buffer
|
||||||
vk::vk_data_heap m_uniform_buffer_ring_info;
|
vk::vk_data_heap m_uniform_buffer_ring_info;
|
||||||
vk::vk_data_heap m_index_buffer_ring_info;
|
vk::vk_data_heap m_index_buffer_ring_info;
|
||||||
vk::vk_data_heap m_texture_upload_buffer_ring_info;
|
vk::vk_data_heap m_texture_upload_buffer_ring_info;
|
||||||
|
std::unique_ptr<vk::buffer_view> m_null_buffer_view;
|
||||||
|
|
||||||
//Vulkan internals
|
//Vulkan internals
|
||||||
vk::command_pool m_command_buffer_pool;
|
vk::command_pool m_command_buffer_pool;
|
||||||
|
@ -263,4 +264,5 @@ protected:
|
||||||
void do_local_task() override;
|
void do_local_task() override;
|
||||||
|
|
||||||
bool on_access_violation(u32 address, bool is_writing) override;
|
bool on_access_violation(u32 address, bool is_writing) override;
|
||||||
|
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -837,17 +837,23 @@ namespace vk
|
||||||
|
|
||||||
bool invalidate_address(u32 address)
|
bool invalidate_address(u32 address)
|
||||||
{
|
{
|
||||||
if (address < read_only_range.first ||
|
return invalidate_range(address, 4096 - (address & 4095));
|
||||||
address > read_only_range.second)
|
}
|
||||||
|
|
||||||
|
bool invalidate_range(u32 address, u32 range, bool unprotect=true)
|
||||||
|
{
|
||||||
|
std::pair<u32, u32> trampled_range = std::make_pair(address, address + range);
|
||||||
|
|
||||||
|
if (trampled_range.second < read_only_range.first ||
|
||||||
|
trampled_range.first > read_only_range.second)
|
||||||
{
|
{
|
||||||
//Doesnt fall in the read_only textures range; check render targets
|
//Doesnt fall in the read_only textures range; check render targets
|
||||||
if (address < no_access_range.first ||
|
if (trampled_range.second < no_access_range.first ||
|
||||||
address > no_access_range.second)
|
trampled_range.first > no_access_range.second)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool response = false;
|
bool response = false;
|
||||||
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
|
|
||||||
std::unordered_map<u32, bool> processed_ranges;
|
std::unordered_map<u32, bool> processed_ranges;
|
||||||
|
|
||||||
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
|
rsx::conditional_lock<shared_mutex> lock(in_access_violation_handler, m_cache_mutex);
|
||||||
|
@ -865,8 +871,7 @@ namespace vk
|
||||||
const u32 lock_base = base & ~0xfff;
|
const u32 lock_base = base & ~0xfff;
|
||||||
const u32 lock_limit = align(range_data.max_range + base, 4096);
|
const u32 lock_limit = align(range_data.max_range + base, 4096);
|
||||||
|
|
||||||
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
|
if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second)
|
||||||
(lock_base > address || lock_limit <= address))
|
|
||||||
{
|
{
|
||||||
processed_ranges[base] = true;
|
processed_ranges[base] = true;
|
||||||
continue;
|
continue;
|
||||||
|
@ -892,8 +897,15 @@ namespace vk
|
||||||
range_reset = true;
|
range_reset = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
tex.set_dirty(true);
|
if (unprotect)
|
||||||
tex.unprotect();
|
{
|
||||||
|
tex.set_dirty(true);
|
||||||
|
tex.unprotect();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tex.discard();
|
||||||
|
}
|
||||||
|
|
||||||
range_data.valid_count--;
|
range_data.valid_count--;
|
||||||
response = true;
|
response = true;
|
||||||
|
@ -912,8 +924,30 @@ namespace vk
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush()
|
void flush(bool purge_dirty=false)
|
||||||
{
|
{
|
||||||
|
if (purge_dirty)
|
||||||
|
{
|
||||||
|
//Reclaims all graphics memory consumed by dirty textures
|
||||||
|
for (auto &address_range : m_cache)
|
||||||
|
{
|
||||||
|
auto &range_data = address_range.second;
|
||||||
|
for (auto &tex : range_data.data)
|
||||||
|
{
|
||||||
|
if (!tex.is_dirty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (tex.exists())
|
||||||
|
{
|
||||||
|
m_dirty_textures.push_back(std::move(tex.get_texture()));
|
||||||
|
m_temporary_image_view.push_back(std::move(tex.get_view()));
|
||||||
|
}
|
||||||
|
|
||||||
|
tex.release_dma_resources();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
m_image_views_to_purge.clear();
|
m_image_views_to_purge.clear();
|
||||||
m_images_to_purge.clear();
|
m_images_to_purge.clear();
|
||||||
|
|
||||||
|
|
|
@ -254,8 +254,7 @@ VKGSRender::upload_vertex_data()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
persistent_view = m_null_buffer_view->value;
|
||||||
persistent_view = m_current_frame->buffer_views_to_clean.back()->value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (required.second > 0)
|
if (required.second > 0)
|
||||||
|
@ -267,8 +266,7 @@ VKGSRender::upload_vertex_data()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_current_frame->buffer_views_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, 0, 0));
|
volatile_view = m_null_buffer_view->value;
|
||||||
volatile_view = m_current_frame->buffer_views_to_clean.back()->value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
|
m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||||
|
|
|
@ -115,6 +115,13 @@ namespace rsx
|
||||||
locked = false;
|
locked = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void discard()
|
||||||
|
{
|
||||||
|
protection = utils::protection::rw;
|
||||||
|
dirty = true;
|
||||||
|
locked = false;
|
||||||
|
}
|
||||||
|
|
||||||
bool overlaps(std::pair<u32, u32> range)
|
bool overlaps(std::pair<u32, u32> range)
|
||||||
{
|
{
|
||||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue