mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 00:41:26 +12:00
Texture cache cleanup, refactoring and fixes
This commit is contained in:
parent
8b3d1c2c91
commit
35139ebf5d
17 changed files with 3209 additions and 1453 deletions
File diff suppressed because it is too large
Load diff
221
rpcs3/Emu/RSX/Common/texture_cache_checker.h
Normal file
221
rpcs3/Emu/RSX/Common/texture_cache_checker.h
Normal file
|
@ -0,0 +1,221 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "../rsx_utils.h"
|
||||||
|
|
||||||
|
#ifdef TEXTURE_CACHE_DEBUG
|
||||||
|
namespace rsx {
|
||||||
|
|
||||||
|
class tex_cache_checker_t {
|
||||||
|
struct per_page_info_t {
|
||||||
|
u8 prot = 0;
|
||||||
|
u8 no = 0;
|
||||||
|
u8 ro = 0;
|
||||||
|
|
||||||
|
FORCE_INLINE utils::protection get_protection() const
|
||||||
|
{
|
||||||
|
return static_cast<utils::protection>(prot);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE void set_protection(utils::protection prot)
|
||||||
|
{
|
||||||
|
this->prot = static_cast<u8>(prot);
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE void reset_refcount()
|
||||||
|
{
|
||||||
|
no = 0;
|
||||||
|
ro = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE u16 sum() const
|
||||||
|
{
|
||||||
|
return u16{ no } + ro;
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE bool verify() const
|
||||||
|
{
|
||||||
|
const utils::protection prot = get_protection();
|
||||||
|
switch (prot)
|
||||||
|
{
|
||||||
|
case utils::protection::no: return no > 0;
|
||||||
|
case utils::protection::ro: return no == 0 && ro > 0;
|
||||||
|
case utils::protection::rw: return no == 0 && ro == 0;
|
||||||
|
default: ASSUME(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE void add(utils::protection prot)
|
||||||
|
{
|
||||||
|
switch (prot)
|
||||||
|
{
|
||||||
|
case utils::protection::no: if (no++ == UINT8_MAX) fmt::throw_exception("add(protection::no) overflow with NO==%d", UINT8_MAX); return;
|
||||||
|
case utils::protection::ro: if (ro++ == UINT8_MAX) fmt::throw_exception("add(protection::ro) overflow with RO==%d", UINT8_MAX); return;
|
||||||
|
default: ASSUME(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE void remove(utils::protection prot)
|
||||||
|
{
|
||||||
|
switch (prot)
|
||||||
|
{
|
||||||
|
case utils::protection::no: if (no-- == 0) fmt::throw_exception("remove(protection::no) overflow with NO==0"); return;
|
||||||
|
case utils::protection::ro: if (ro-- == 0) fmt::throw_exception("remove(protection::ro) overflow with RO==0"); return;
|
||||||
|
default: ASSUME(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
static_assert(sizeof(per_page_info_t) <= 4, "page_info_elmnt must be less than 4-bytes in size");
|
||||||
|
|
||||||
|
|
||||||
|
// 4GB memory space / 4096 bytes per page = 1048576 pages
|
||||||
|
static constexpr size_t num_pages = 0x1'0000'0000 / 4096;
|
||||||
|
per_page_info_t _info[num_pages];
|
||||||
|
|
||||||
|
static constexpr size_t rsx_address_to_index(u32 address)
|
||||||
|
{
|
||||||
|
return (address / 4096);
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr u32 index_to_rsx_address(size_t idx)
|
||||||
|
{
|
||||||
|
return static_cast<u32>(idx * 4096);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr per_page_info_t* rsx_address_to_info_pointer(u32 address)
|
||||||
|
{
|
||||||
|
return &(_info[rsx_address_to_index(address)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr const per_page_info_t* rsx_address_to_info_pointer(u32 address) const
|
||||||
|
{
|
||||||
|
return &(_info[rsx_address_to_index(address)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr u32 info_pointer_to_address(const per_page_info_t* ptr) const
|
||||||
|
{
|
||||||
|
return index_to_rsx_address(static_cast<size_t>(ptr - _info));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string prot_to_str(utils::protection prot) const
|
||||||
|
{
|
||||||
|
switch (prot)
|
||||||
|
{
|
||||||
|
case utils::protection::no: return "NA";
|
||||||
|
case utils::protection::ro: return "RO";
|
||||||
|
case utils::protection::rw: return "RW";
|
||||||
|
default: fmt::throw_exception("Unreachable " HERE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
tex_cache_checker_t()
|
||||||
|
{
|
||||||
|
// Initialize array to all 0
|
||||||
|
memset(&_info, 0, sizeof(_info));
|
||||||
|
}
|
||||||
|
static_assert(static_cast<u32>(utils::protection::rw) == 0, "utils::protection::rw must have value 0 for the above constructor to work");
|
||||||
|
|
||||||
|
void set_protection(const address_range& range, utils::protection prot)
|
||||||
|
{
|
||||||
|
AUDIT(range.is_page_range());
|
||||||
|
AUDIT(prot == utils::protection::no || prot == utils::protection::ro || prot == utils::protection::rw);
|
||||||
|
|
||||||
|
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||||
|
{
|
||||||
|
ptr->set_protection(prot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void discard(const address_range& range)
|
||||||
|
{
|
||||||
|
set_protection(range, utils::protection::rw);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset_refcount()
|
||||||
|
{
|
||||||
|
for (per_page_info_t* ptr = rsx_address_to_info_pointer(0); ptr <= rsx_address_to_info_pointer(0xFF'FF'FF'FF); ptr++)
|
||||||
|
{
|
||||||
|
ptr->reset_refcount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(const address_range& range, utils::protection prot)
|
||||||
|
{
|
||||||
|
AUDIT(range.is_page_range());
|
||||||
|
AUDIT(prot == utils::protection::no || prot == utils::protection::ro);
|
||||||
|
|
||||||
|
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||||
|
{
|
||||||
|
ptr->add(prot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove(const address_range& range, utils::protection prot)
|
||||||
|
{
|
||||||
|
AUDIT(range.is_page_range());
|
||||||
|
AUDIT(prot == utils::protection::no || prot == utils::protection::ro);
|
||||||
|
|
||||||
|
for (per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||||
|
{
|
||||||
|
ptr->remove(prot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the a lower bound as to how many locked sections are known to be within the given range with each protection {NA,RO}
|
||||||
|
// The assumption here is that the page in the given range with the largest number of refcounted sections represents the lower bound to how many there must be
|
||||||
|
std::pair<u8,u8> get_minimum_number_of_sections(const address_range& range) const
|
||||||
|
{
|
||||||
|
AUDIT(range.is_page_range());
|
||||||
|
|
||||||
|
u8 no = 0;
|
||||||
|
u8 ro = 0;
|
||||||
|
for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||||
|
{
|
||||||
|
no = std::max(no, ptr->no);
|
||||||
|
ro = std::max(ro, ptr->ro);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { no,ro };
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_unprotected(const address_range& range, bool allow_ro = false, bool must_be_empty = true) const
|
||||||
|
{
|
||||||
|
AUDIT(range.is_page_range());
|
||||||
|
for (const per_page_info_t* ptr = rsx_address_to_info_pointer(range.start); ptr <= rsx_address_to_info_pointer(range.end); ptr++)
|
||||||
|
{
|
||||||
|
const auto prot = ptr->get_protection();
|
||||||
|
if (prot != utils::protection::rw && (!allow_ro || prot != utils::protection::ro))
|
||||||
|
{
|
||||||
|
const u32 addr = info_pointer_to_address(ptr);
|
||||||
|
fmt::throw_exception("Page at addr=0x%8x should be RW%s: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (must_be_empty && (
|
||||||
|
ptr->no > 0 ||
|
||||||
|
(!allow_ro && ptr->ro > 0)
|
||||||
|
))
|
||||||
|
{
|
||||||
|
const u32 addr = info_pointer_to_address(ptr);
|
||||||
|
fmt::throw_exception("Page at addr=0x%8x should not have any NA%s sections: Prot=%s, RO=%d, NA=%d", addr, allow_ro ? " or RO" : "", prot_to_str(prot), ptr->ro, ptr->no);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void verify() const
|
||||||
|
{
|
||||||
|
for (size_t idx = 0; idx < num_pages; idx++)
|
||||||
|
{
|
||||||
|
auto &elmnt = _info[idx];
|
||||||
|
if (!elmnt.verify())
|
||||||
|
{
|
||||||
|
const u32 addr = index_to_rsx_address(idx);
|
||||||
|
const utils::protection prot = elmnt.get_protection();
|
||||||
|
fmt::throw_exception("Protection verification failed at addr=0x%x: Prot=%s, RO=%d, NA=%d", addr, prot_to_str(prot), elmnt.ro, elmnt.no);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
extern tex_cache_checker_t tex_cache_checker;
|
||||||
|
}; // namespace rsx
|
||||||
|
#endif //TEXTURE_CACHE_DEBUG
|
1544
rpcs3/Emu/RSX/Common/texture_cache_utils.h
Normal file
1544
rpcs3/Emu/RSX/Common/texture_cache_utils.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1593,12 +1593,12 @@ void GLGSRender::flip(int buffer)
|
||||||
gl::screen.bind();
|
gl::screen.bind();
|
||||||
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
|
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
|
||||||
|
|
||||||
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), "RSX Load: " + std::to_string(get_load()) + "%");
|
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
|
||||||
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), "draw calls: " + std::to_string(m_draw_calls));
|
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", m_draw_calls));
|
||||||
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), "draw call setup: " + std::to_string(m_begin_time) + "us");
|
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", m_begin_time));
|
||||||
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
|
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", m_vertex_upload_time));
|
||||||
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us");
|
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", m_textures_upload_time));
|
||||||
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", m_draw_time));
|
||||||
|
|
||||||
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
||||||
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||||
|
@ -1606,9 +1606,9 @@ void GLGSRender::flip(int buffer)
|
||||||
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
|
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
|
||||||
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
|
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
|
||||||
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
|
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
|
||||||
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
|
||||||
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
|
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
|
||||||
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
||||||
}
|
}
|
||||||
|
|
||||||
m_frame->flip(m_context);
|
m_frame->flip(m_context);
|
||||||
|
@ -1640,8 +1640,11 @@ void GLGSRender::flip(int buffer)
|
||||||
|
|
||||||
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
{
|
{
|
||||||
bool can_flush = (std::this_thread::get_id() == m_thread_id);
|
const bool can_flush = (std::this_thread::get_id() == m_thread_id);
|
||||||
auto result = m_gl_texture_cache.invalidate_address(address, is_writing, can_flush);
|
const rsx::invalidation_cause cause =
|
||||||
|
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
|
||||||
|
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
|
||||||
|
auto result = m_gl_texture_cache.invalidate_address(address, cause);
|
||||||
|
|
||||||
if (!result.violation_handled)
|
if (!result.violation_handled)
|
||||||
return false;
|
return false;
|
||||||
|
@ -1664,12 +1667,15 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GLGSRender::on_invalidate_memory_range(u32 address_base, u32 size)
|
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
||||||
{
|
{
|
||||||
//Discard all memory in that range without bothering with writeback (Force it for strict?)
|
//Discard all memory in that range without bothering with writeback (Force it for strict?)
|
||||||
if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled)
|
auto data = std::move(m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap));
|
||||||
|
AUDIT(data.empty());
|
||||||
|
|
||||||
|
if (data.violation_handled)
|
||||||
{
|
{
|
||||||
m_gl_texture_cache.purge_dirty();
|
m_gl_texture_cache.purge_unreleased_sections();
|
||||||
{
|
{
|
||||||
std::lock_guard lock(m_sampler_mutex);
|
std::lock_guard lock(m_sampler_mutex);
|
||||||
m_samplers_dirty.store(true);
|
m_samplers_dirty.store(true);
|
||||||
|
|
|
@ -390,7 +390,7 @@ protected:
|
||||||
void do_local_task(rsx::FIFO_state state) override;
|
void do_local_task(rsx::FIFO_state state) override;
|
||||||
|
|
||||||
bool on_access_violation(u32 address, bool is_writing) override;
|
bool on_access_violation(u32 address, bool is_writing) override;
|
||||||
void on_invalidate_memory_range(u32 address_base, u32 size) override;
|
void on_invalidate_memory_range(const utils::address_range &range) override;
|
||||||
void notify_tile_unbound(u32 tile) override;
|
void notify_tile_unbound(u32 tile) override;
|
||||||
|
|
||||||
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
||||||
|
|
|
@ -237,8 +237,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
old_format_found = true;
|
old_format_found = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_gl_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
|
const utils::address_range surface_range = m_surface_info[i].get_memory_range();
|
||||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height);
|
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||||
|
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
|
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
|
||||||
|
@ -268,8 +269,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
|
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
|
||||||
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
|
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
|
||||||
|
|
||||||
m_gl_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
||||||
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height);
|
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||||
|
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||||
|
@ -381,8 +383,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
{
|
{
|
||||||
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
|
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
|
||||||
|
|
||||||
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * layout.aa_factors[1];
|
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
|
||||||
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
|
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
|
||||||
color_format.format, color_format.type, color_format.swap_bytes);
|
color_format.format, color_format.type, color_format.swap_bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -392,8 +394,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||||
{
|
{
|
||||||
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
||||||
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1];
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
||||||
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
|
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
|
||||||
depth_format_gl.format, depth_format_gl.type, true);
|
depth_format_gl.format, depth_format_gl.type, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -448,12 +450,11 @@ void GLGSRender::read_buffers()
|
||||||
if (!m_surface_info[i].pitch)
|
if (!m_surface_info[i].pitch)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const u32 range = pitch * height;
|
|
||||||
|
|
||||||
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
|
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
|
||||||
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
|
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
|
||||||
|
|
||||||
bool success = m_gl_texture_cache.load_memory_from_cache(texaddr, pitch * height, std::get<1>(m_rtts.m_bound_render_targets[i]));
|
const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height);
|
||||||
|
bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i]));
|
||||||
|
|
||||||
//Fall back to slower methods if the image could not be fetched from cache.
|
//Fall back to slower methods if the image could not be fetched from cache.
|
||||||
if (!success)
|
if (!success)
|
||||||
|
@ -464,7 +465,7 @@ void GLGSRender::read_buffers()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_gl_texture_cache.invalidate_range(texaddr, range, false, false, true);
|
m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::read);
|
||||||
|
|
||||||
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
|
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
|
||||||
color_buffer.read(buffer.get(), width, height, pitch);
|
color_buffer.read(buffer.get(), width, height, pitch);
|
||||||
|
@ -512,8 +513,9 @@ void GLGSRender::read_buffers()
|
||||||
if (!pitch)
|
if (!pitch)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
|
const u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
|
||||||
bool in_cache = m_gl_texture_cache.load_memory_from_cache(depth_address, pitch * height, std::get<1>(m_rtts.m_bound_depth_stencil));
|
const utils::address_range range = utils::address_range::start_length(depth_address, pitch * height);
|
||||||
|
bool in_cache = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_depth_stencil));
|
||||||
|
|
||||||
if (in_cache)
|
if (in_cache)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -141,8 +141,10 @@ namespace gl
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class cached_texture_section : public rsx::cached_texture_section
|
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section>
|
||||||
{
|
{
|
||||||
|
using baseclass = rsx::cached_texture_section<gl::cached_texture_section>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
fence m_fence;
|
fence m_fence;
|
||||||
u32 pbo_id = 0;
|
u32 pbo_id = 0;
|
||||||
|
@ -226,7 +228,7 @@ namespace gl
|
||||||
void init_buffer()
|
void init_buffer()
|
||||||
{
|
{
|
||||||
const f32 resolution_scale = (context == rsx::texture_upload_context::framebuffer_storage? rsx::get_resolution_scale() : 1.f);
|
const f32 resolution_scale = (context == rsx::texture_upload_context::framebuffer_storage? rsx::get_resolution_scale() : 1.f);
|
||||||
const u32 real_buffer_size = (resolution_scale <= 1.f) ? cpu_address_range : (u32)(resolution_scale * resolution_scale * cpu_address_range);
|
const u32 real_buffer_size = (resolution_scale <= 1.f) ? get_section_size() : (u32)(resolution_scale * resolution_scale * get_section_size());
|
||||||
const u32 buffer_size = align(real_buffer_size, 4096);
|
const u32 buffer_size = align(real_buffer_size, 4096);
|
||||||
|
|
||||||
if (pbo_id)
|
if (pbo_id)
|
||||||
|
@ -249,13 +251,14 @@ namespace gl
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
using baseclass::cached_texture_section;
|
||||||
|
|
||||||
void reset(u32 base, u32 size, bool /*flushable*/=false)
|
void reset(const utils::address_range &memory_range)
|
||||||
{
|
{
|
||||||
rsx::cached_texture_section::reset(base, size);
|
|
||||||
|
|
||||||
vram_texture = nullptr;
|
vram_texture = nullptr;
|
||||||
managed_texture.reset();
|
managed_texture.reset();
|
||||||
|
|
||||||
|
baseclass::reset(memory_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
|
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
|
||||||
|
@ -283,7 +286,7 @@ namespace gl
|
||||||
if (rsx_pitch > 0)
|
if (rsx_pitch > 0)
|
||||||
this->rsx_pitch = rsx_pitch;
|
this->rsx_pitch = rsx_pitch;
|
||||||
else
|
else
|
||||||
this->rsx_pitch = cpu_address_range / height;
|
this->rsx_pitch = get_section_size() / height;
|
||||||
|
|
||||||
this->width = w;
|
this->width = w;
|
||||||
this->height = h;
|
this->height = h;
|
||||||
|
@ -292,6 +295,9 @@ namespace gl
|
||||||
this->mipmaps = mipmaps;
|
this->mipmaps = mipmaps;
|
||||||
|
|
||||||
set_format(gl_format, gl_type, swap_bytes);
|
set_format(gl_format, gl_type, swap_bytes);
|
||||||
|
|
||||||
|
// Notify baseclass
|
||||||
|
baseclass::on_section_resources_created();
|
||||||
}
|
}
|
||||||
|
|
||||||
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps)
|
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps)
|
||||||
|
@ -307,6 +313,9 @@ namespace gl
|
||||||
|
|
||||||
rsx_pitch = 0;
|
rsx_pitch = 0;
|
||||||
real_pitch = 0;
|
real_pitch = 0;
|
||||||
|
|
||||||
|
// Notify baseclass
|
||||||
|
baseclass::on_section_resources_created();
|
||||||
}
|
}
|
||||||
|
|
||||||
void make_flushable()
|
void make_flushable()
|
||||||
|
@ -458,11 +467,12 @@ namespace gl
|
||||||
bool flush()
|
bool flush()
|
||||||
{
|
{
|
||||||
if (flushed) return true; //Already written, ignore
|
if (flushed) return true; //Already written, ignore
|
||||||
|
AUDIT( is_locked() );
|
||||||
|
|
||||||
bool result = true;
|
bool result = true;
|
||||||
if (!synchronized)
|
if (!synchronized)
|
||||||
{
|
{
|
||||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
||||||
copy_texture();
|
copy_texture();
|
||||||
|
|
||||||
if (!synchronized)
|
if (!synchronized)
|
||||||
|
@ -480,11 +490,14 @@ namespace gl
|
||||||
m_fence.wait_for_signal();
|
m_fence.wait_for_signal();
|
||||||
flushed = true;
|
flushed = true;
|
||||||
|
|
||||||
const auto valid_range = get_confirmed_range();
|
const auto valid_range = get_confirmed_range_delta();
|
||||||
void *dst = get_raw_ptr(valid_range.first, true);
|
const u32 valid_offset = valid_range.first;
|
||||||
|
const u32 valid_length = valid_range.second;
|
||||||
|
AUDIT( valid_length > 0 );
|
||||||
|
|
||||||
|
void *dst = get_ptr_by_offset(valid_range.first, true);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||||
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
|
void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_offset, valid_length, GL_MAP_READ_BIT);
|
||||||
|
|
||||||
//throw if map failed since we'll segfault anyway
|
//throw if map failed since we'll segfault anyway
|
||||||
verify(HERE), src != nullptr;
|
verify(HERE), src != nullptr;
|
||||||
|
@ -496,20 +509,20 @@ namespace gl
|
||||||
require_manual_shuffle = true;
|
require_manual_shuffle = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
|
||||||
{
|
{
|
||||||
memcpy(dst, src, valid_range.second);
|
memcpy(dst, src, valid_length);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (valid_range.second % rsx_pitch)
|
if (valid_length % rsx_pitch)
|
||||||
{
|
{
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
fmt::throw_exception("Unreachable" HERE);
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *_src = (u8*)src;
|
u8 *_src = (u8*)src;
|
||||||
u8 *_dst = (u8*)dst;
|
u8 *_dst = (u8*)dst;
|
||||||
const auto num_rows = valid_range.second / rsx_pitch;
|
const auto num_rows = valid_length / rsx_pitch;
|
||||||
for (u32 row = 0; row < num_rows; ++row)
|
for (u32 row = 0; row < num_rows; ++row)
|
||||||
{
|
{
|
||||||
memcpy(_dst, _src, real_pitch);
|
memcpy(_dst, _src, real_pitch);
|
||||||
|
@ -521,7 +534,7 @@ namespace gl
|
||||||
if (require_manual_shuffle)
|
if (require_manual_shuffle)
|
||||||
{
|
{
|
||||||
//byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
//byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
||||||
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_range.second / rsx_pitch);
|
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_length / rsx_pitch);
|
||||||
}
|
}
|
||||||
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
|
else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
|
||||||
{
|
{
|
||||||
|
@ -537,7 +550,7 @@ namespace gl
|
||||||
case texture::type::ushort_1_5_5_5_rev:
|
case texture::type::ushort_1_5_5_5_rev:
|
||||||
case texture::type::ushort_5_5_5_1:
|
case texture::type::ushort_5_5_5_1:
|
||||||
{
|
{
|
||||||
const u32 num_reps = valid_range.second / 2;
|
const u32 num_reps = valid_length / 2;
|
||||||
be_t<u16>* in = (be_t<u16>*)(dst);
|
be_t<u16>* in = (be_t<u16>*)(dst);
|
||||||
u16* out = (u16*)dst;
|
u16* out = (u16*)dst;
|
||||||
|
|
||||||
|
@ -556,7 +569,7 @@ namespace gl
|
||||||
case texture::type::uint_2_10_10_10_rev:
|
case texture::type::uint_2_10_10_10_rev:
|
||||||
case texture::type::uint_8_8_8_8:
|
case texture::type::uint_8_8_8_8:
|
||||||
{
|
{
|
||||||
u32 num_reps = valid_range.second / 4;
|
u32 num_reps = valid_length / 4;
|
||||||
be_t<u32>* in = (be_t<u32>*)(dst);
|
be_t<u32>* in = (be_t<u32>*)(dst);
|
||||||
u32* out = (u32*)dst;
|
u32* out = (u32*)dst;
|
||||||
|
|
||||||
|
@ -575,7 +588,7 @@ namespace gl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_io(valid_range.first, valid_range.second);
|
flush_ptr_by_offset(valid_offset, valid_length);
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||||
|
|
||||||
|
@ -586,13 +599,10 @@ namespace gl
|
||||||
|
|
||||||
void destroy()
|
void destroy()
|
||||||
{
|
{
|
||||||
if (!locked && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
|
if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
|
||||||
//Already destroyed
|
//Already destroyed
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (locked)
|
|
||||||
unprotect();
|
|
||||||
|
|
||||||
if (pbo_id == 0)
|
if (pbo_id == 0)
|
||||||
{
|
{
|
||||||
//Read-only texture, destroy texture memory
|
//Read-only texture, destroy texture memory
|
||||||
|
@ -611,6 +621,13 @@ namespace gl
|
||||||
|
|
||||||
if (!m_fence.is_empty())
|
if (!m_fence.is_empty())
|
||||||
m_fence.destroy();
|
m_fence.destroy();
|
||||||
|
|
||||||
|
baseclass::on_section_resources_destroyed();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool exists() const
|
||||||
|
{
|
||||||
|
return (vram_texture != nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
texture::format get_format() const
|
texture::format get_format() const
|
||||||
|
@ -618,16 +635,6 @@ namespace gl
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exists() const
|
|
||||||
{
|
|
||||||
return vram_texture != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_flushable() const
|
|
||||||
{
|
|
||||||
return (protection == utils::protection::no);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_flushed() const
|
bool is_flushed() const
|
||||||
{
|
{
|
||||||
return flushed;
|
return flushed;
|
||||||
|
@ -683,9 +690,10 @@ namespace gl
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class texture_cache : public rsx::texture_cache<void*, cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
|
class texture_cache : public rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
using baseclass = rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>;
|
||||||
|
|
||||||
struct discardable_storage
|
struct discardable_storage
|
||||||
{
|
{
|
||||||
|
@ -717,30 +725,10 @@ namespace gl
|
||||||
blitter m_hw_blitter;
|
blitter m_hw_blitter;
|
||||||
std::vector<discardable_storage> m_temporary_surfaces;
|
std::vector<discardable_storage> m_temporary_surfaces;
|
||||||
|
|
||||||
cached_texture_section& create_texture(gl::viewable_image* image, u32 texaddr, u32 texsize, u32 w, u32 h, u32 depth, u32 mipmaps)
|
|
||||||
{
|
|
||||||
cached_texture_section& tex = find_cached_texture(texaddr, texsize, true, w, h, depth);
|
|
||||||
tex.reset(texaddr, texsize, false);
|
|
||||||
tex.create_read_only(image, w, h, depth, mipmaps);
|
|
||||||
read_only_range = tex.get_min_max(read_only_range);
|
|
||||||
return tex;
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
for (auto &address_range : m_cache)
|
baseclass::clear();
|
||||||
{
|
|
||||||
auto &range_data = address_range.second;
|
|
||||||
for (auto &tex : range_data.data)
|
|
||||||
{
|
|
||||||
tex.destroy();
|
|
||||||
}
|
|
||||||
|
|
||||||
range_data.data.resize(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
clear_temporary_subresources();
|
clear_temporary_subresources();
|
||||||
m_unreleased_texture_objects = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear_temporary_subresources()
|
void clear_temporary_subresources()
|
||||||
|
@ -850,11 +838,6 @@ namespace gl
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
void free_texture_section(cached_texture_section& tex) override
|
|
||||||
{
|
|
||||||
tex.destroy();
|
|
||||||
}
|
|
||||||
|
|
||||||
gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
|
gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
|
||||||
const texture_channel_remap_t& remap_vector) override
|
const texture_channel_remap_t& remap_vector) override
|
||||||
{
|
{
|
||||||
|
@ -946,7 +929,7 @@ namespace gl
|
||||||
dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
|
dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
cached_texture_section* create_new_texture(void*&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
||||||
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
|
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
|
||||||
{
|
{
|
||||||
auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
|
auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
|
||||||
|
@ -954,15 +937,23 @@ namespace gl
|
||||||
const auto swizzle = get_component_mapping(gcm_format, flags);
|
const auto swizzle = get_component_mapping(gcm_format, flags);
|
||||||
image->set_native_component_layout(swizzle);
|
image->set_native_component_layout(swizzle);
|
||||||
|
|
||||||
auto& cached = create_texture(image, rsx_address, rsx_size, width, height, depth, mipmaps);
|
auto& cached = *find_cached_texture(rsx_range, true, true, width, width, depth, mipmaps);
|
||||||
cached.set_dirty(false);
|
ASSERT(!cached.is_locked());
|
||||||
|
|
||||||
|
// Prepare section
|
||||||
|
cached.reset(rsx_range);
|
||||||
cached.set_view_flags(flags);
|
cached.set_view_flags(flags);
|
||||||
cached.set_context(context);
|
cached.set_context(context);
|
||||||
cached.set_gcm_format(gcm_format);
|
|
||||||
cached.set_image_type(type);
|
cached.set_image_type(type);
|
||||||
|
cached.set_gcm_format(gcm_format);
|
||||||
|
|
||||||
|
cached.create_read_only(image, width, height, depth, mipmaps);
|
||||||
|
cached.set_dirty(false);
|
||||||
|
|
||||||
if (context != rsx::texture_upload_context::blit_engine_dst)
|
if (context != rsx::texture_upload_context::blit_engine_dst)
|
||||||
{
|
{
|
||||||
|
AUDIT( cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always );
|
||||||
|
read_only_range = cached.get_min_max(read_only_range, rsx::section_bounds::locked_range); // TODO ruipin: This was outside the if, but is inside the if in Vulkan. Ask kd-11
|
||||||
cached.protect(utils::protection::ro);
|
cached.protect(utils::protection::ro);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -998,8 +989,8 @@ namespace gl
|
||||||
|
|
||||||
//NOTE: Protection is handled by the caller
|
//NOTE: Protection is handled by the caller
|
||||||
cached.make_flushable();
|
cached.make_flushable();
|
||||||
cached.set_dimensions(width, height, depth, (rsx_size / height));
|
cached.set_dimensions(width, height, depth, (rsx_range.length() / height));
|
||||||
no_access_range = cached.get_min_max(no_access_range);
|
no_access_range = cached.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
update_cache_tag();
|
update_cache_tag();
|
||||||
|
@ -1010,7 +1001,8 @@ namespace gl
|
||||||
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override
|
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override
|
||||||
{
|
{
|
||||||
void* unused = nullptr;
|
void* unused = nullptr;
|
||||||
auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type,
|
const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height);
|
||||||
|
auto section = create_new_texture(unused, rsx_range, width, height, depth, mipmaps, gcm_format, context, type,
|
||||||
rsx::texture_create_flags::default_component_order);
|
rsx::texture_create_flags::default_component_order);
|
||||||
|
|
||||||
gl::upload_texture(section->get_raw_texture()->id(), rsx_address, gcm_format, width, height, depth, mipmaps,
|
gl::upload_texture(section->get_raw_texture()->id(), rsx_address, gcm_format, width, height, depth, mipmaps,
|
||||||
|
@ -1082,9 +1074,7 @@ namespace gl
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
texture_cache() {}
|
using baseclass::texture_cache;
|
||||||
|
|
||||||
~texture_cache() {}
|
|
||||||
|
|
||||||
void initialize()
|
void initialize()
|
||||||
{
|
{
|
||||||
|
@ -1103,19 +1093,17 @@ namespace gl
|
||||||
{
|
{
|
||||||
reader_lock lock(m_cache_mutex);
|
reader_lock lock(m_cache_mutex);
|
||||||
|
|
||||||
auto found = m_cache.find(get_block_address(rsx_address));
|
auto &block = m_storage.block_for(rsx_address);
|
||||||
if (found == m_cache.end())
|
|
||||||
|
if (block.get_locked_count() == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
//if (found->second.valid_count == 0)
|
for (auto& tex : block)
|
||||||
//return false;
|
|
||||||
|
|
||||||
for (auto& tex : found->second.data)
|
|
||||||
{
|
{
|
||||||
if (tex.is_dirty())
|
if (tex.is_dirty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
|
if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
|
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
|
||||||
|
@ -1127,9 +1115,9 @@ namespace gl
|
||||||
|
|
||||||
void on_frame_end() override
|
void on_frame_end() override
|
||||||
{
|
{
|
||||||
if (m_unreleased_texture_objects >= m_max_zombie_objects)
|
if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects)
|
||||||
{
|
{
|
||||||
purge_dirty();
|
purge_unreleased_sections();
|
||||||
}
|
}
|
||||||
|
|
||||||
clear_temporary_subresources();
|
clear_temporary_subresources();
|
||||||
|
@ -1158,7 +1146,7 @@ namespace gl
|
||||||
gl::texture::format::depth_stencil : gl::texture::format::depth;
|
gl::texture::format::depth_stencil : gl::texture::format::depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size);
|
flush_if_cache_miss_likely(fmt, result.to_address_range());
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1350,34 +1350,12 @@ namespace rsx
|
||||||
{
|
{
|
||||||
if (!in_begin_end && state != FIFO_state::lock_wait)
|
if (!in_begin_end && state != FIFO_state::lock_wait)
|
||||||
{
|
{
|
||||||
if (!m_invalidated_memory_ranges.empty())
|
reader_lock lock(m_mtx_task);
|
||||||
{
|
|
||||||
std::lock_guard lock(m_mtx_task);
|
|
||||||
|
|
||||||
for (const auto& range : m_invalidated_memory_ranges)
|
if (m_invalidated_memory_range.valid())
|
||||||
{
|
{
|
||||||
on_invalidate_memory_range(range.first, range.second);
|
lock.upgrade();
|
||||||
|
handle_invalidated_memory_range();
|
||||||
// Clean the main memory super_ptr cache if invalidated
|
|
||||||
const auto range_end = range.first + range.second;
|
|
||||||
for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();)
|
|
||||||
{
|
|
||||||
const auto mem_start = It->first;
|
|
||||||
const auto mem_end = mem_start + It->second.size();
|
|
||||||
const bool overlaps = (mem_start < range_end && range.first < mem_end);
|
|
||||||
|
|
||||||
if (overlaps)
|
|
||||||
{
|
|
||||||
It = main_super_memory_block.erase(It);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
It++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
m_invalidated_memory_ranges.clear();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2676,15 +2654,32 @@ namespace rsx
|
||||||
|
|
||||||
void thread::on_notify_memory_mapped(u32 address, u32 size)
|
void thread::on_notify_memory_mapped(u32 address, u32 size)
|
||||||
{
|
{
|
||||||
// TODO
|
// In the case where an unmap is followed shortly after by a remap of the same address space
|
||||||
|
// we must block until RSX has invalidated the memory
|
||||||
|
// or lock m_mtx_task and do it ourselves
|
||||||
|
|
||||||
|
if (m_rsx_thread_exiting)
|
||||||
|
return;
|
||||||
|
|
||||||
|
reader_lock lock(m_mtx_task);
|
||||||
|
|
||||||
|
const auto map_range = address_range::start_length(address, size);
|
||||||
|
|
||||||
|
if (!m_invalidated_memory_range.valid())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (m_invalidated_memory_range.overlaps(map_range))
|
||||||
|
{
|
||||||
|
lock.upgrade();
|
||||||
|
handle_invalidated_memory_range();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void thread::on_notify_memory_unmapped(u32 address, u32 size)
|
||||||
void thread::on_notify_memory_unmapped(u32 base_address, u32 size)
|
|
||||||
{
|
{
|
||||||
if (!m_rsx_thread_exiting && base_address < 0xC0000000)
|
if (!m_rsx_thread_exiting && address < 0xC0000000)
|
||||||
{
|
{
|
||||||
u32 ea = base_address >> 20, io = RSXIOMem.io[ea];
|
u32 ea = address >> 20, io = RSXIOMem.io[ea];
|
||||||
|
|
||||||
if (io < 512)
|
if (io < 512)
|
||||||
{
|
{
|
||||||
|
@ -2704,9 +2699,54 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Queue up memory invalidation
|
||||||
std::lock_guard lock(m_mtx_task);
|
std::lock_guard lock(m_mtx_task);
|
||||||
m_invalidated_memory_ranges.push_back({ base_address, size });
|
const bool existing_range_valid = m_invalidated_memory_range.valid();
|
||||||
|
const auto unmap_range = address_range::start_length(address, size);
|
||||||
|
|
||||||
|
if (existing_range_valid && m_invalidated_memory_range.touches(unmap_range))
|
||||||
|
{
|
||||||
|
// Merge range-to-invalidate in case of consecutive unmaps
|
||||||
|
m_invalidated_memory_range.set_min_max(unmap_range);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (existing_range_valid)
|
||||||
|
{
|
||||||
|
// We can only delay consecutive unmaps.
|
||||||
|
// Otherwise, to avoid VirtualProtect failures, we need to do the invalidation here
|
||||||
|
handle_invalidated_memory_range();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_invalidated_memory_range = unmap_range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: m_mtx_task lock must be acquired before calling this method
|
||||||
|
void thread::handle_invalidated_memory_range()
|
||||||
|
{
|
||||||
|
if (!m_invalidated_memory_range.valid())
|
||||||
|
return;
|
||||||
|
|
||||||
|
on_invalidate_memory_range(m_invalidated_memory_range);
|
||||||
|
|
||||||
|
// Clean the main memory super_ptr cache if invalidated
|
||||||
|
for (auto It = main_super_memory_block.begin(); It != main_super_memory_block.end();)
|
||||||
|
{
|
||||||
|
const auto block_range = address_range::start_length(It->first, It->second.size());
|
||||||
|
|
||||||
|
if (m_invalidated_memory_range.overlaps(block_range))
|
||||||
|
{
|
||||||
|
It = main_super_memory_block.erase(It);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
It++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_invalidated_memory_range.invalidate();
|
||||||
}
|
}
|
||||||
|
|
||||||
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
|
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
|
||||||
|
|
|
@ -347,7 +347,7 @@ namespace rsx
|
||||||
std::shared_ptr<rsx::overlays::display_manager> m_overlay_manager;
|
std::shared_ptr<rsx::overlays::display_manager> m_overlay_manager;
|
||||||
|
|
||||||
// Invalidated memory range
|
// Invalidated memory range
|
||||||
std::vector<std::pair<u32, u32>> m_invalidated_memory_ranges;
|
address_range m_invalidated_memory_range;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RsxDmaControl* ctrl = nullptr;
|
RsxDmaControl* ctrl = nullptr;
|
||||||
|
@ -468,6 +468,8 @@ namespace rsx
|
||||||
thread();
|
thread();
|
||||||
virtual ~thread();
|
virtual ~thread();
|
||||||
|
|
||||||
|
void handle_invalidated_memory_range();
|
||||||
|
|
||||||
virtual void on_task() override;
|
virtual void on_task() override;
|
||||||
virtual void on_exit() override;
|
virtual void on_exit() override;
|
||||||
|
|
||||||
|
@ -495,7 +497,7 @@ namespace rsx
|
||||||
virtual void flip(int buffer) = 0;
|
virtual void flip(int buffer) = 0;
|
||||||
virtual u64 timestamp();
|
virtual u64 timestamp();
|
||||||
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
||||||
virtual void on_invalidate_memory_range(u32 /*address*/, u32 /*range*/) {}
|
virtual void on_invalidate_memory_range(const address_range & /*range*/) {}
|
||||||
virtual void notify_tile_unbound(u32 /*tile*/) {}
|
virtual void notify_tile_unbound(u32 /*tile*/) {}
|
||||||
|
|
||||||
// zcull
|
// zcull
|
||||||
|
|
|
@ -812,7 +812,9 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
vk::texture_cache::thrashed_set result;
|
vk::texture_cache::thrashed_set result;
|
||||||
{
|
{
|
||||||
std::lock_guard lock(m_secondary_cb_guard);
|
std::lock_guard lock(m_secondary_cb_guard);
|
||||||
result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
|
|
||||||
|
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
|
||||||
|
result = std::move(m_texture_cache.invalidate_address(address, cause, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!result.violation_handled)
|
if (!result.violation_handled)
|
||||||
|
@ -893,13 +895,16 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::on_invalidate_memory_range(u32 address_base, u32 size)
|
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
||||||
{
|
{
|
||||||
std::lock_guard lock(m_secondary_cb_guard);
|
std::lock_guard lock(m_secondary_cb_guard);
|
||||||
if (m_texture_cache.invalidate_range(address_base, size, true, true, false,
|
|
||||||
m_secondary_command_buffer, m_swapchain->get_graphics_queue()).violation_handled)
|
auto data = std::move(m_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
|
||||||
|
AUDIT(data.empty());
|
||||||
|
|
||||||
|
if (data.violation_handled)
|
||||||
{
|
{
|
||||||
m_texture_cache.purge_dirty();
|
m_texture_cache.purge_unreleased_sections();
|
||||||
{
|
{
|
||||||
std::lock_guard lock(m_sampler_mutex);
|
std::lock_guard lock(m_sampler_mutex);
|
||||||
m_samplers_dirty.store(true);
|
m_samplers_dirty.store(true);
|
||||||
|
@ -2625,9 +2630,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
if (old_format == VK_FORMAT_UNDEFINED)
|
if (old_format == VK_FORMAT_UNDEFINED)
|
||||||
old_format = vk::get_compatible_surface_format(m_surface_info[i].color_format).first;
|
old_format = vk::get_compatible_surface_format(m_surface_info[i].color_format).first;
|
||||||
|
|
||||||
m_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
|
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
|
||||||
m_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height,
|
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
|
||||||
*m_current_command_buffer, m_swapchain->get_graphics_queue());
|
m_texture_cache.flush_if_cache_miss_likely(old_format, rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||||
}
|
}
|
||||||
|
|
||||||
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
||||||
|
@ -2641,9 +2646,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
|
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
|
||||||
{
|
{
|
||||||
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
|
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
|
||||||
m_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
||||||
m_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
|
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||||
*m_current_command_buffer, m_swapchain->get_graphics_queue());
|
m_texture_cache.flush_if_cache_miss_likely(old_format, surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||||
}
|
}
|
||||||
|
|
||||||
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
||||||
|
@ -2697,8 +2702,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
{
|
{
|
||||||
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
||||||
|
|
||||||
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * layout.aa_factors[1];
|
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
|
||||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
|
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
||||||
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
|
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2708,8 +2713,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||||
{
|
{
|
||||||
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||||
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height * layout.aa_factors[1];
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
|
||||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
|
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
|
||||||
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
|
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3129,13 +3134,13 @@ void VKGSRender::flip(int buffer)
|
||||||
|
|
||||||
if (g_cfg.video.overlay)
|
if (g_cfg.video.overlay)
|
||||||
{
|
{
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "RSX Load: " + std::to_string(get_load()) + "%");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls));
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", m_draw_calls));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", m_setup_time));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", m_vertex_upload_time));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "texture upload time: " + std::to_string(m_textures_upload_time) + "us");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", m_textures_upload_time));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", m_draw_time));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", m_flip_time));
|
||||||
|
|
||||||
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
||||||
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||||
|
@ -3144,10 +3149,10 @@ void VKGSRender::flip(int buffer)
|
||||||
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
|
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
|
||||||
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
|
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
|
||||||
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
|
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M");
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
|
||||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d misprediction(s), %d speculation(s))", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);
|
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);
|
||||||
|
|
|
@ -433,7 +433,7 @@ protected:
|
||||||
void notify_tile_unbound(u32 tile) override;
|
void notify_tile_unbound(u32 tile) override;
|
||||||
|
|
||||||
bool on_access_violation(u32 address, bool is_writing) override;
|
bool on_access_violation(u32 address, bool is_writing) override;
|
||||||
void on_invalidate_memory_range(u32 address_base, u32 size) override;
|
void on_invalidate_memory_range(const utils::address_range &range) override;
|
||||||
|
|
||||||
bool on_decompiler_task() override;
|
bool on_decompiler_task() override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -13,8 +13,10 @@ extern u64 get_system_time();
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
class cached_texture_section : public rsx::cached_texture_section
|
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section>
|
||||||
{
|
{
|
||||||
|
using baseclass = typename rsx::cached_texture_section<vk::cached_texture_section>;
|
||||||
|
|
||||||
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
|
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
|
||||||
|
|
||||||
//DMA relevant data
|
//DMA relevant data
|
||||||
|
@ -24,15 +26,14 @@ namespace vk
|
||||||
std::unique_ptr<vk::buffer> dma_buffer;
|
std::unique_ptr<vk::buffer> dma_buffer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
using baseclass::cached_texture_section;
|
||||||
|
|
||||||
cached_texture_section() {}
|
void reset(const utils::address_range &memory_range)
|
||||||
|
|
||||||
void reset(u32 base, u32 length)
|
|
||||||
{
|
{
|
||||||
if (length > cpu_address_range)
|
if (memory_range.length() > get_section_size())
|
||||||
release_dma_resources();
|
release_dma_resources();
|
||||||
|
|
||||||
rsx::cached_texture_section::reset(base, length);
|
baseclass::reset(memory_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)
|
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image *image, u32 rsx_pitch, bool managed, u32 gcm_format, bool pack_swap_bytes = false)
|
||||||
|
@ -56,13 +57,16 @@ namespace vk
|
||||||
if (rsx_pitch > 0)
|
if (rsx_pitch > 0)
|
||||||
this->rsx_pitch = rsx_pitch;
|
this->rsx_pitch = rsx_pitch;
|
||||||
else
|
else
|
||||||
this->rsx_pitch = cpu_address_range / height;
|
this->rsx_pitch = get_section_size() / height;
|
||||||
|
|
||||||
//Even if we are managing the same vram section, we cannot guarantee contents are static
|
//Even if we are managing the same vram section, we cannot guarantee contents are static
|
||||||
//The create method is only invoked when a new mangaged session is required
|
//The create method is only invoked when a new managed session is required
|
||||||
synchronized = false;
|
synchronized = false;
|
||||||
flushed = false;
|
flushed = false;
|
||||||
sync_timestamp = 0ull;
|
sync_timestamp = 0ull;
|
||||||
|
|
||||||
|
// Notify baseclass
|
||||||
|
baseclass::on_section_resources_created();
|
||||||
}
|
}
|
||||||
|
|
||||||
void release_dma_resources()
|
void release_dma_resources()
|
||||||
|
@ -81,11 +85,14 @@ namespace vk
|
||||||
|
|
||||||
void destroy()
|
void destroy()
|
||||||
{
|
{
|
||||||
|
m_tex_cache->on_section_destroyed(*this);
|
||||||
vram_texture = nullptr;
|
vram_texture = nullptr;
|
||||||
release_dma_resources();
|
release_dma_resources();
|
||||||
|
|
||||||
|
baseclass::on_section_resources_destroyed();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exists() const
|
inline bool exists() const
|
||||||
{
|
{
|
||||||
return (vram_texture != nullptr);
|
return (vram_texture != nullptr);
|
||||||
}
|
}
|
||||||
|
@ -115,12 +122,6 @@ namespace vk
|
||||||
return vram_texture->info.format;
|
return vram_texture->info.format;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_flushable() const
|
|
||||||
{
|
|
||||||
//This section is active and can be flushed to cpu
|
|
||||||
return (protection == utils::protection::no);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_flushed() const
|
bool is_flushed() const
|
||||||
{
|
{
|
||||||
//This memory section was flushable, but a flush has already removed protection
|
//This memory section was flushable, but a flush has already removed protection
|
||||||
|
@ -144,7 +145,7 @@ namespace vk
|
||||||
if (dma_buffer.get() == nullptr)
|
if (dma_buffer.get() == nullptr)
|
||||||
{
|
{
|
||||||
auto memory_type = m_device->get_memory_mapping().host_visible_coherent;
|
auto memory_type = m_device->get_memory_mapping().host_visible_coherent;
|
||||||
dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manage_cb_lifetime)
|
if (manage_cb_lifetime)
|
||||||
|
@ -246,18 +247,18 @@ namespace vk
|
||||||
{
|
{
|
||||||
verify (HERE), mem_target->value != dma_buffer->value;
|
verify (HERE), mem_target->value != dma_buffer->value;
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range,
|
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||||
|
|
||||||
shuffle_kernel->run(cmd, mem_target, cpu_address_range);
|
shuffle_kernel->run(cmd, mem_target, get_section_size());
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, cpu_address_range,
|
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
VkBufferCopy copy = {};
|
VkBufferCopy copy = {};
|
||||||
copy.size = cpu_address_range;
|
copy.size = get_section_size();
|
||||||
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©);
|
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,6 +283,7 @@ namespace vk
|
||||||
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
|
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
|
||||||
{
|
{
|
||||||
if (flushed) return true;
|
if (flushed) return true;
|
||||||
|
AUDIT( is_locked() );
|
||||||
|
|
||||||
if (m_device == nullptr)
|
if (m_device == nullptr)
|
||||||
{
|
{
|
||||||
|
@ -293,7 +295,7 @@ namespace vk
|
||||||
|
|
||||||
if (!synchronized)
|
if (!synchronized)
|
||||||
{
|
{
|
||||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
||||||
copy_texture(true, cmd, submit_queue);
|
copy_texture(true, cmd, submit_queue);
|
||||||
result = false;
|
result = false;
|
||||||
}
|
}
|
||||||
|
@ -301,22 +303,26 @@ namespace vk
|
||||||
verify(HERE), real_pitch > 0;
|
verify(HERE), real_pitch > 0;
|
||||||
flushed = true;
|
flushed = true;
|
||||||
|
|
||||||
const auto valid_range = get_confirmed_range();
|
const auto valid_range = get_confirmed_range_delta();
|
||||||
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
|
const u32 valid_offset = valid_range.first;
|
||||||
void* pixels_dst = get_raw_ptr(valid_range.first, true);
|
const u32 valid_length = valid_range.second;
|
||||||
|
AUDIT( valid_length > 0 );
|
||||||
|
|
||||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
void* pixels_src = dma_buffer->map(valid_offset, valid_length);
|
||||||
|
void* pixels_dst = get_ptr_by_offset(valid_offset, true);
|
||||||
|
|
||||||
|
if (real_pitch >= rsx_pitch || valid_length <= rsx_pitch)
|
||||||
{
|
{
|
||||||
memcpy(pixels_dst, pixels_src, valid_range.second);
|
memcpy(pixels_dst, pixels_src, valid_length);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (valid_range.second % rsx_pitch)
|
if (valid_length % rsx_pitch)
|
||||||
{
|
{
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
fmt::throw_exception("Unreachable" HERE);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 num_rows = valid_range.second / rsx_pitch;
|
const u32 num_rows = valid_length / rsx_pitch;
|
||||||
auto _src = (u8*)pixels_src;
|
auto _src = (u8*)pixels_src;
|
||||||
auto _dst = (u8*)pixels_dst;
|
auto _dst = (u8*)pixels_dst;
|
||||||
|
|
||||||
|
@ -328,7 +334,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_io(valid_range.first, valid_range.second);
|
flush_ptr_by_offset(valid_offset, valid_length);
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
reset_write_statistics();
|
reset_write_statistics();
|
||||||
|
|
||||||
|
@ -405,9 +411,18 @@ namespace vk
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class texture_cache : public rsx::texture_cache<vk::command_buffer, cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
|
class texture_cache : public rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
virtual void on_section_destroyed(cached_texture_section& tex)
|
||||||
|
{
|
||||||
|
m_discarded_memory_size += tex.get_section_size();
|
||||||
|
m_discardable_storage.push_back(tex);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
using baseclass = rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>;
|
||||||
|
|
||||||
//Vulkan internals
|
//Vulkan internals
|
||||||
vk::render_device* m_device;
|
vk::render_device* m_device;
|
||||||
vk::memory_type_mapping m_memory_types;
|
vk::memory_type_mapping m_memory_types;
|
||||||
|
@ -419,30 +434,11 @@ namespace vk
|
||||||
std::list<discarded_storage> m_discardable_storage;
|
std::list<discarded_storage> m_discardable_storage;
|
||||||
std::atomic<u32> m_discarded_memory_size = { 0 };
|
std::atomic<u32> m_discarded_memory_size = { 0 };
|
||||||
|
|
||||||
void purge_cache()
|
void clear()
|
||||||
{
|
{
|
||||||
for (auto &address_range : m_cache)
|
baseclass::clear();
|
||||||
{
|
|
||||||
auto &range_data = address_range.second;
|
|
||||||
for (auto &tex : range_data.data)
|
|
||||||
{
|
|
||||||
if (tex.exists())
|
|
||||||
{
|
|
||||||
m_discardable_storage.push_back(tex);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tex.is_locked())
|
|
||||||
tex.unprotect();
|
|
||||||
|
|
||||||
tex.release_dma_resources();
|
|
||||||
}
|
|
||||||
|
|
||||||
range_data.data.resize(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_discardable_storage.clear();
|
m_discardable_storage.clear();
|
||||||
m_unreleased_texture_objects = 0;
|
|
||||||
m_texture_memory_in_use = 0;
|
|
||||||
m_discarded_memory_size = 0;
|
m_discarded_memory_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -486,14 +482,6 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
void free_texture_section(cached_texture_section& tex) override
|
|
||||||
{
|
|
||||||
m_discarded_memory_size += tex.get_section_size();
|
|
||||||
m_discardable_storage.push_back(tex);
|
|
||||||
tex.destroy();
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
|
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
|
||||||
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
|
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
|
||||||
{
|
{
|
||||||
|
@ -776,7 +764,7 @@ namespace vk
|
||||||
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
|
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
cached_texture_section* create_new_texture(vk::command_buffer& cmd, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format,
|
||||||
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
|
rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
|
||||||
{
|
{
|
||||||
const u16 section_depth = depth;
|
const u16 section_depth = depth;
|
||||||
|
@ -846,26 +834,30 @@ namespace vk
|
||||||
|
|
||||||
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer });
|
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer });
|
||||||
|
|
||||||
cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth);
|
cached_texture_section& region = *find_cached_texture(rsx_range, true, true, width, height, section_depth);
|
||||||
region.reset(rsx_address, rsx_size);
|
ASSERT(!region.is_locked());
|
||||||
region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format);
|
|
||||||
region.set_dirty(false);
|
// New section, we must prepare it
|
||||||
|
region.reset(rsx_range);
|
||||||
region.set_context(context);
|
region.set_context(context);
|
||||||
region.set_gcm_format(gcm_format);
|
region.set_gcm_format(gcm_format);
|
||||||
region.set_image_type(type);
|
region.set_image_type(type);
|
||||||
|
|
||||||
|
region.create(width, height, section_depth, mipmaps, image, 0, true, gcm_format);
|
||||||
|
region.set_dirty(false);
|
||||||
|
|
||||||
//Its not necessary to lock blit dst textures as they are just reused as necessary
|
//Its not necessary to lock blit dst textures as they are just reused as necessary
|
||||||
if (context != rsx::texture_upload_context::blit_engine_dst)
|
if (context != rsx::texture_upload_context::blit_engine_dst)
|
||||||
{
|
{
|
||||||
region.protect(utils::protection::ro);
|
region.protect(utils::protection::ro);
|
||||||
read_only_range = region.get_min_max(read_only_range);
|
read_only_range = region.get_min_max(read_only_range, rsx::section_bounds::locked_range);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//TODO: Confirm byte swap patterns
|
//TODO: Confirm byte swap patterns
|
||||||
//NOTE: Protection is handled by the caller
|
//NOTE: Protection is handled by the caller
|
||||||
region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
|
region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
no_access_range = region.get_min_max(no_access_range);
|
no_access_range = region.get_min_max(no_access_range, rsx::section_bounds::locked_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
update_cache_tag();
|
update_cache_tag();
|
||||||
|
@ -875,7 +867,8 @@ namespace vk
|
||||||
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
|
cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
|
||||||
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override
|
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override
|
||||||
{
|
{
|
||||||
auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, context, type,
|
const utils::address_range rsx_range = utils::address_range::start_length(rsx_address, pitch * height);
|
||||||
|
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, gcm_format, context, type,
|
||||||
rsx::texture_create_flags::default_component_order);
|
rsx::texture_create_flags::default_component_order);
|
||||||
|
|
||||||
auto image = section->get_raw_texture();
|
auto image = section->get_raw_texture();
|
||||||
|
@ -962,6 +955,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
using baseclass::texture_cache;
|
||||||
|
|
||||||
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
|
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
|
||||||
{
|
{
|
||||||
|
@ -974,26 +968,24 @@ namespace vk
|
||||||
|
|
||||||
void destroy() override
|
void destroy() override
|
||||||
{
|
{
|
||||||
purge_cache();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_depth_texture(u32 rsx_address, u32 rsx_size) override
|
bool is_depth_texture(u32 rsx_address, u32 rsx_size) override
|
||||||
{
|
{
|
||||||
reader_lock lock(m_cache_mutex);
|
reader_lock lock(m_cache_mutex);
|
||||||
|
|
||||||
auto found = m_cache.find(get_block_address(rsx_address));
|
auto &block = m_storage.block_for(rsx_address);
|
||||||
if (found == m_cache.end())
|
|
||||||
|
if (block.get_locked_count() == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
//if (found->second.valid_count == 0)
|
for (auto& tex : block)
|
||||||
//return false;
|
|
||||||
|
|
||||||
for (auto& tex : found->second.data)
|
|
||||||
{
|
{
|
||||||
if (tex.is_dirty())
|
if (tex.is_dirty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
|
if (!tex.overlaps(rsx_address, rsx::section_bounds::full_range))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
|
if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
|
||||||
|
@ -1016,10 +1008,10 @@ namespace vk
|
||||||
|
|
||||||
void on_frame_end() override
|
void on_frame_end() override
|
||||||
{
|
{
|
||||||
if (m_unreleased_texture_objects >= m_max_zombie_objects ||
|
if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects ||
|
||||||
m_discarded_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources
|
m_discarded_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources
|
||||||
{
|
{
|
||||||
purge_dirty();
|
purge_unreleased_sections();
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 last_complete_frame = vk::get_last_completed_frame_id();
|
const u64 last_complete_frame = vk::get_last_completed_frame_id();
|
||||||
|
@ -1228,7 +1220,7 @@ namespace vk
|
||||||
{
|
{
|
||||||
if (reply.real_dst_size)
|
if (reply.real_dst_size)
|
||||||
{
|
{
|
||||||
flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue);
|
flush_if_cache_miss_likely(helper.format, reply.to_address_range(), cmd, m_submit_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1239,12 +1231,12 @@ namespace vk
|
||||||
|
|
||||||
const u32 get_unreleased_textures_count() const override
|
const u32 get_unreleased_textures_count() const override
|
||||||
{
|
{
|
||||||
return m_unreleased_texture_objects + (u32)m_discardable_storage.size();
|
return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 get_texture_memory_in_use() const override
|
const u32 get_texture_memory_in_use() const override
|
||||||
{
|
{
|
||||||
return m_texture_memory_in_use;
|
return m_storage.m_texture_memory_in_use;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 get_temporary_memory_in_use()
|
const u32 get_temporary_memory_in_use()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "Utilities/VirtualMemory.h"
|
#include "Utilities/VirtualMemory.h"
|
||||||
#include "Utilities/hash.h"
|
#include "Utilities/hash.h"
|
||||||
#include "Emu/Memory/vm.h"
|
#include "Emu/Memory/vm.h"
|
||||||
|
@ -6,6 +6,7 @@
|
||||||
#include "Common/ProgramStateCache.h"
|
#include "Common/ProgramStateCache.h"
|
||||||
#include "Emu/Cell/Modules/cellMsgDialog.h"
|
#include "Emu/Cell/Modules/cellMsgDialog.h"
|
||||||
#include "Emu/System.h"
|
#include "Emu/System.h"
|
||||||
|
#include "Common/texture_cache_checker.h"
|
||||||
|
|
||||||
#include "rsx_utils.h"
|
#include "rsx_utils.h"
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
@ -19,109 +20,125 @@ namespace rsx
|
||||||
protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding
|
protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding
|
||||||
};
|
};
|
||||||
|
|
||||||
enum overlap_test_bounds
|
enum section_bounds
|
||||||
{
|
{
|
||||||
full_range,
|
full_range,
|
||||||
protected_range,
|
locked_range,
|
||||||
confirmed_range
|
confirmed_range
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline void memory_protect(const address_range& range, utils::protection prot)
|
||||||
|
{
|
||||||
|
verify(HERE), range.is_page_range();
|
||||||
|
|
||||||
|
//LOG_ERROR(RSX, "memory_protect(0x%x, 0x%x, %x)", static_cast<u32>(range.start), static_cast<u32>(range.length()), static_cast<u32>(prot));
|
||||||
|
utils::memory_protect(vm::base(range.start), range.length(), prot);
|
||||||
|
|
||||||
|
#ifdef TEXTURE_CACHE_DEBUG
|
||||||
|
tex_cache_checker.set_protection(range, prot);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
class buffered_section
|
class buffered_section
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
static const protection_policy guard_policy = protect_policy_full_range;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
u32 locked_address_base = 0;
|
address_range locked_range;
|
||||||
u32 locked_address_range = 0;
|
address_range cpu_range = {};
|
||||||
weak_ptr locked_memory_ptr;
|
address_range confirmed_range;
|
||||||
std::pair<u32, u32> confirmed_range;
|
weak_ptr super_ptr;
|
||||||
|
|
||||||
inline void tag_memory()
|
|
||||||
{
|
|
||||||
if (locked_memory_ptr)
|
|
||||||
{
|
|
||||||
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
|
||||||
u32* first = locked_memory_ptr.get<u32>(confirmed_range.first, true);
|
|
||||||
u32* last = locked_memory_ptr.get<u32>(valid_limit - 4, true);
|
|
||||||
|
|
||||||
*first = cpu_address_base + confirmed_range.first;
|
|
||||||
*last = cpu_address_base + valid_limit - 4;
|
|
||||||
|
|
||||||
locked_memory_ptr.flush(confirmed_range.first, 4);
|
|
||||||
locked_memory_ptr.flush(valid_limit - 4, 4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
u32 cpu_address_base = 0;
|
|
||||||
u32 cpu_address_range = 0;
|
|
||||||
|
|
||||||
utils::protection protection = utils::protection::rw;
|
utils::protection protection = utils::protection::rw;
|
||||||
protection_policy guard_policy;
|
|
||||||
|
|
||||||
bool locked = false;
|
bool locked = false;
|
||||||
bool dirty = false;
|
|
||||||
|
|
||||||
inline void init_lockable_range(u32 base, u32 length)
|
inline void init_lockable_range(const address_range &range)
|
||||||
{
|
{
|
||||||
locked_address_base = (base & ~4095);
|
locked_range = range.to_page_range();
|
||||||
|
|
||||||
if ((guard_policy != protect_policy_full_range) && (length >= 4096))
|
if ((guard_policy != protect_policy_full_range) && (range.length() >= 4096))
|
||||||
{
|
{
|
||||||
const u32 limit = base + length;
|
const u32 block_start = (locked_range.start < range.start) ? (locked_range.start + 4096u) : locked_range.start;
|
||||||
const u32 block_end = (limit & ~4095);
|
const u32 block_end = locked_range.end;
|
||||||
const u32 block_start = (locked_address_base < base) ? (locked_address_base + 4096) : locked_address_base;
|
|
||||||
|
|
||||||
locked_address_range = 4096;
|
|
||||||
|
|
||||||
if (block_start < block_end)
|
if (block_start < block_end)
|
||||||
{
|
{
|
||||||
//Page boundaries cover at least one unique page
|
// protect unique page range
|
||||||
locked_address_base = block_start;
|
locked_range.start = block_start;
|
||||||
|
locked_range.end = block_end;
|
||||||
|
}
|
||||||
|
|
||||||
if (guard_policy == protect_policy_conservative)
|
if (guard_policy == protect_policy_one_page)
|
||||||
{
|
{
|
||||||
//Protect full unique range
|
// protect exactly one page
|
||||||
locked_address_range = (block_end - block_start);
|
locked_range.set_length(4096u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
locked_address_range = align(base + length, 4096) - locked_address_base;
|
|
||||||
|
|
||||||
verify(HERE), locked_address_range > 0;
|
AUDIT( (locked_range.start == page_start(range.start)) || (locked_range.start == next_page(range.start)) );
|
||||||
|
AUDIT( locked_range.end <= page_end(range.end) );
|
||||||
|
verify(HERE), locked_range.is_page_range();
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
buffered_section() {}
|
buffered_section() {};
|
||||||
~buffered_section() {}
|
~buffered_section() {};
|
||||||
|
|
||||||
void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range)
|
void reset(const address_range &memory_range)
|
||||||
{
|
{
|
||||||
verify(HERE), locked == false;
|
verify(HERE), memory_range.valid() && locked == false;
|
||||||
|
|
||||||
cpu_address_base = base;
|
cpu_range = address_range(memory_range);
|
||||||
cpu_address_range = length;
|
confirmed_range.invalidate();
|
||||||
|
locked_range.invalidate();
|
||||||
|
|
||||||
confirmed_range = { 0, 0 };
|
|
||||||
protection = utils::protection::rw;
|
protection = utils::protection::rw;
|
||||||
guard_policy = protect_policy;
|
|
||||||
locked = false;
|
locked = false;
|
||||||
|
|
||||||
init_lockable_range(cpu_address_base, cpu_address_range);
|
super_ptr = {};
|
||||||
|
|
||||||
|
init_lockable_range(cpu_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
void protect(utils::protection prot, bool force = false)
|
protected:
|
||||||
|
void invalidate_range()
|
||||||
{
|
{
|
||||||
if (prot == protection && !force) return;
|
ASSERT(!locked);
|
||||||
|
|
||||||
verify(HERE), locked_address_range > 0;
|
cpu_range.invalidate();
|
||||||
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
|
confirmed_range.invalidate();
|
||||||
protection = prot;
|
locked_range.invalidate();
|
||||||
locked = prot != utils::protection::rw;
|
}
|
||||||
|
|
||||||
if (prot == utils::protection::no)
|
public:
|
||||||
|
void protect(utils::protection new_prot, bool force = false)
|
||||||
{
|
{
|
||||||
locked_memory_ptr = rsx::get_super_ptr(cpu_address_base, cpu_address_range);
|
if (new_prot == protection && !force) return;
|
||||||
|
|
||||||
|
verify(HERE), locked_range.is_page_range();
|
||||||
|
AUDIT( !confirmed_range.valid() || confirmed_range.inside(cpu_range) );
|
||||||
|
|
||||||
|
#ifdef TEXTURE_CACHE_DEBUG
|
||||||
|
if (new_prot != protection || force)
|
||||||
|
{
|
||||||
|
if (locked && !force) // When force=true, it is the responsibility of the caller to remove this section from the checker refcounting
|
||||||
|
tex_cache_checker.remove(locked_range, protection);
|
||||||
|
if (new_prot != utils::protection::rw)
|
||||||
|
tex_cache_checker.add(locked_range, new_prot);
|
||||||
|
}
|
||||||
|
#endif // TEXTURE_CACHE_DEBUG
|
||||||
|
|
||||||
|
rsx::memory_protect(locked_range, new_prot);
|
||||||
|
protection = new_prot;
|
||||||
|
locked = (protection != utils::protection::rw);
|
||||||
|
|
||||||
|
if (protection == utils::protection::no)
|
||||||
|
{
|
||||||
|
super_ptr = rsx::get_super_ptr(cpu_range);
|
||||||
|
verify(HERE), super_ptr;
|
||||||
tag_memory();
|
tag_memory();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -129,255 +146,274 @@ namespace rsx
|
||||||
if (!locked)
|
if (!locked)
|
||||||
{
|
{
|
||||||
//Unprotect range also invalidates secured range
|
//Unprotect range also invalidates secured range
|
||||||
confirmed_range = { 0, 0 };
|
confirmed_range.invalidate();
|
||||||
}
|
}
|
||||||
|
|
||||||
locked_memory_ptr = {};
|
super_ptr = {};
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void protect(utils::protection prot, const std::pair<u32, u32>& range_confirm)
|
}
|
||||||
|
|
||||||
|
void protect(utils::protection prot, const std::pair<u32, u32>& new_confirm)
|
||||||
{
|
{
|
||||||
|
// new_confirm.first is an offset after cpu_range.start
|
||||||
|
// new_confirm.second is the length (after cpu_range.start + new_confirm.first)
|
||||||
|
|
||||||
|
#ifdef TEXTURE_CACHE_DEBUG
|
||||||
|
// We need to remove the lockable range from page_info as we will be re-protecting with force==true
|
||||||
|
if (locked)
|
||||||
|
tex_cache_checker.remove(locked_range, protection);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (prot != utils::protection::rw)
|
if (prot != utils::protection::rw)
|
||||||
{
|
{
|
||||||
const auto old_prot = protection;
|
if (confirmed_range.valid())
|
||||||
const auto old_locked_base = locked_address_base;
|
|
||||||
const auto old_locked_length = locked_address_range;
|
|
||||||
|
|
||||||
if (confirmed_range.second)
|
|
||||||
{
|
{
|
||||||
const u32 range_limit = std::max(range_confirm.first + range_confirm.second, confirmed_range.first + confirmed_range.second);
|
confirmed_range.start = std::min(confirmed_range.start, cpu_range.start + new_confirm.first);
|
||||||
confirmed_range.first = std::min(confirmed_range.first, range_confirm.first);
|
confirmed_range.end = std::max(confirmed_range.end, cpu_range.start + new_confirm.first + new_confirm.second - 1);
|
||||||
confirmed_range.second = range_limit - confirmed_range.first;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
confirmed_range = range_confirm;
|
confirmed_range = address_range::start_length(cpu_range.start + new_confirm.first, new_confirm.second);
|
||||||
|
ASSERT(!locked || locked_range.inside(confirmed_range.to_page_range()));
|
||||||
}
|
}
|
||||||
|
|
||||||
init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second);
|
verify(HERE), confirmed_range.inside(cpu_range);
|
||||||
|
init_lockable_range(confirmed_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
protect(prot, true);
|
protect(prot, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void unprotect()
|
inline void unprotect()
|
||||||
{
|
{
|
||||||
|
AUDIT(protection != utils::protection::rw);
|
||||||
protect(utils::protection::rw);
|
protect(utils::protection::rw);
|
||||||
}
|
}
|
||||||
|
|
||||||
void discard()
|
inline void discard()
|
||||||
{
|
{
|
||||||
|
#ifdef TEXTURE_CACHE_DEBUG
|
||||||
|
if (locked)
|
||||||
|
tex_cache_checker.remove(locked_range, protection);
|
||||||
|
#endif
|
||||||
|
|
||||||
protection = utils::protection::rw;
|
protection = utils::protection::rw;
|
||||||
dirty = true;
|
confirmed_range.invalidate();
|
||||||
|
super_ptr = {};
|
||||||
locked = false;
|
locked = false;
|
||||||
|
}
|
||||||
|
|
||||||
confirmed_range = { 0, 0 };
|
inline const address_range& get_bounds(section_bounds bounds) const
|
||||||
locked_memory_ptr = {};
|
{
|
||||||
|
switch (bounds)
|
||||||
|
{
|
||||||
|
case section_bounds::full_range:
|
||||||
|
return cpu_range;
|
||||||
|
case section_bounds::locked_range:
|
||||||
|
return locked_range;
|
||||||
|
case section_bounds::confirmed_range:
|
||||||
|
return confirmed_range.valid() ? confirmed_range : cpu_range;
|
||||||
|
default:
|
||||||
|
ASSUME(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Overlapping checks
|
||||||
|
*/
|
||||||
|
inline bool overlaps(const u32 address, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).overlaps(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool overlaps(const address_range &other, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).overlaps(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool overlaps(const buffered_section &other, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).overlaps(other.get_bounds(bounds));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool inside(const address_range &other, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).inside(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool inside(const buffered_section &other, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).inside(other.get_bounds(bounds));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline s32 signed_distance(const address_range &other, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).signed_distance(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline u32 distance(const address_range &other, section_bounds bounds) const
|
||||||
|
{
|
||||||
|
return get_bounds(bounds).distance(other);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if range overlaps with this section.
|
* Utilities
|
||||||
* ignore_protection_range - if true, the test should not check against the aligned protection range, instead
|
|
||||||
* tests against actual range of contents in memory
|
|
||||||
*/
|
*/
|
||||||
bool overlaps(std::pair<u32, u32> range) const
|
inline bool valid_range() const
|
||||||
{
|
{
|
||||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
return cpu_range.valid();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool overlaps(u32 address, overlap_test_bounds bounds) const
|
inline bool is_locked() const
|
||||||
{
|
|
||||||
switch (bounds)
|
|
||||||
{
|
|
||||||
case overlap_test_bounds::full_range:
|
|
||||||
{
|
|
||||||
return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range);
|
|
||||||
}
|
|
||||||
case overlap_test_bounds::protected_range:
|
|
||||||
{
|
|
||||||
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
|
|
||||||
}
|
|
||||||
case overlap_test_bounds::confirmed_range:
|
|
||||||
{
|
|
||||||
const auto range = get_confirmed_range();
|
|
||||||
return ((range.first + cpu_address_base) <= address && (address - range.first) < range.second);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool overlaps(const std::pair<u32, u32>& range, overlap_test_bounds bounds) const
|
|
||||||
{
|
|
||||||
switch (bounds)
|
|
||||||
{
|
|
||||||
case overlap_test_bounds::full_range:
|
|
||||||
{
|
|
||||||
return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second);
|
|
||||||
}
|
|
||||||
case overlap_test_bounds::protected_range:
|
|
||||||
{
|
|
||||||
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
|
|
||||||
}
|
|
||||||
case overlap_test_bounds::confirmed_range:
|
|
||||||
{
|
|
||||||
const auto test_range = get_confirmed_range();
|
|
||||||
return region_overlaps(test_range.first + cpu_address_base, test_range.first + cpu_address_base + test_range.second, range.first, range.first + range.second);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
|
|
||||||
* If true, returns the range <min, max> with updated invalid range
|
|
||||||
*/
|
|
||||||
std::tuple<bool, std::pair<u32, u32>> overlaps_page(const std::pair<u32, u32>& old_range, u32 address, overlap_test_bounds bounds) const
|
|
||||||
{
|
|
||||||
const u32 page_base = address & ~4095;
|
|
||||||
const u32 page_limit = page_base + 4096;
|
|
||||||
|
|
||||||
const u32 compare_min = std::min(old_range.first, page_base);
|
|
||||||
const u32 compare_max = std::max(old_range.second, page_limit);
|
|
||||||
|
|
||||||
u32 memory_base, memory_range;
|
|
||||||
switch (bounds)
|
|
||||||
{
|
|
||||||
case overlap_test_bounds::full_range:
|
|
||||||
{
|
|
||||||
memory_base = (cpu_address_base & ~4095);
|
|
||||||
memory_range = align(cpu_address_base + cpu_address_range, 4096u) - memory_base;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case overlap_test_bounds::protected_range:
|
|
||||||
{
|
|
||||||
memory_base = locked_address_base;
|
|
||||||
memory_range = locked_address_range;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case overlap_test_bounds::confirmed_range:
|
|
||||||
{
|
|
||||||
const auto range = get_confirmed_range();
|
|
||||||
memory_base = (cpu_address_base + range.first) & ~4095;
|
|
||||||
memory_range = align(cpu_address_base + range.first + range.second, 4096u) - memory_base;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!region_overlaps(memory_base, memory_base + memory_range, compare_min, compare_max))
|
|
||||||
return std::make_tuple(false, old_range);
|
|
||||||
|
|
||||||
const u32 _min = std::min(memory_base, compare_min);
|
|
||||||
const u32 _max = std::max(memory_base + memory_range, compare_max);
|
|
||||||
return std::make_tuple(true, std::make_pair(_min, _max));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_locked() const
|
|
||||||
{
|
{
|
||||||
return locked;
|
return locked;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_dirty() const
|
inline u32 get_section_base() const
|
||||||
{
|
{
|
||||||
return dirty;
|
return cpu_range.start;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_dirty(bool state)
|
inline u32 get_section_size() const
|
||||||
{
|
{
|
||||||
dirty = state;
|
return cpu_range.valid() ? cpu_range.length() : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 get_section_base() const
|
inline const address_range& get_locked_range() const
|
||||||
{
|
{
|
||||||
return cpu_address_base;
|
AUDIT( locked );
|
||||||
|
return locked_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 get_section_size() const
|
inline const address_range& get_section_range() const
|
||||||
{
|
{
|
||||||
return cpu_address_range;
|
return cpu_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool matches(u32 cpu_address, u32 size) const
|
const address_range& get_confirmed_range() const
|
||||||
{
|
{
|
||||||
return (cpu_address_base == cpu_address && cpu_address_range == size);
|
return confirmed_range.valid() ? confirmed_range : cpu_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<u32, u32> get_min_max(const std::pair<u32, u32>& current_min_max) const
|
const std::pair<u32, u32> get_confirmed_range_delta() const
|
||||||
{
|
{
|
||||||
u32 min = std::min(current_min_max.first, locked_address_base);
|
if (!confirmed_range.valid())
|
||||||
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
|
return { 0, cpu_range.length() };
|
||||||
|
|
||||||
return std::make_pair(min, max);
|
return { confirmed_range.start - cpu_range.start, confirmed_range.length() };
|
||||||
}
|
}
|
||||||
|
|
||||||
utils::protection get_protection() const
|
inline bool matches(const address_range &range) const
|
||||||
|
{
|
||||||
|
return cpu_range.valid() && cpu_range == range;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline utils::protection get_protection() const
|
||||||
{
|
{
|
||||||
return protection;
|
return protection;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T = void>
|
inline address_range get_min_max(const address_range& current_min_max, section_bounds bounds) const
|
||||||
T* get_raw_ptr(u32 offset = 0, bool no_sync = false)
|
|
||||||
{
|
{
|
||||||
verify(HERE), locked_memory_ptr;
|
return get_bounds(bounds).get_min_max(current_min_max);
|
||||||
return locked_memory_ptr.get<T>(offset, no_sync);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Super Pointer
|
||||||
|
*/
|
||||||
|
template <typename T = void>
|
||||||
|
inline T* get_ptr_by_offset(u32 offset = 0, bool no_sync = false)
|
||||||
|
{
|
||||||
|
verify(HERE), super_ptr && cpu_range.length() >= (offset + sizeof(T));
|
||||||
|
return super_ptr.get<T>(offset, no_sync);
|
||||||
|
}
|
||||||
|
|
||||||
|
// specialization due to sizeof(void) being illegal
|
||||||
|
inline void* get_ptr_by_offset(u32 offset, bool no_sync)
|
||||||
|
{
|
||||||
|
verify(HERE), super_ptr && cpu_range.length() >= (offset + 1);
|
||||||
|
return super_ptr.get<void>(offset, no_sync);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T = void>
|
||||||
|
inline T* get_ptr(u32 address, bool no_sync = false)
|
||||||
|
{
|
||||||
|
verify(HERE), cpu_range.start <= address; // super_ptr & sizeof(T) tests are done by get_ptr_by_offset
|
||||||
|
return get_ptr_by_offset<T>(address - cpu_range.start, no_sync);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void flush_ptr_by_offset(u32 offset = 0, u32 len = 0) const
|
||||||
|
{
|
||||||
|
verify(HERE), super_ptr && cpu_range.length() >= (offset + len);
|
||||||
|
super_ptr.flush(offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void flush_ptr(u32 address, u32 len = 0) const
|
||||||
|
{
|
||||||
|
verify(HERE), cpu_range.start <= address; // super_ptr & length tests are done by flush_ptr_by_offset
|
||||||
|
return flush_ptr_by_offset(address - cpu_range.start, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void flush_ptr(const address_range &range) const
|
||||||
|
{
|
||||||
|
return flush_ptr(range.start, range.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Memory tagging
|
||||||
|
*/
|
||||||
|
private:
|
||||||
|
inline void tag_memory()
|
||||||
|
{
|
||||||
|
// We only need to tag memory if we are in full-range mode
|
||||||
|
if (guard_policy == protect_policy_full_range)
|
||||||
|
return;
|
||||||
|
|
||||||
|
AUDIT(locked && super_ptr);
|
||||||
|
|
||||||
|
const address_range& range = get_confirmed_range();
|
||||||
|
|
||||||
|
volatile u32* first = get_ptr<volatile u32>(range.start, true);
|
||||||
|
volatile u32* last = get_ptr<volatile u32>(range.end - 3, true);
|
||||||
|
|
||||||
|
*first = range.start;
|
||||||
|
*last = range.end;
|
||||||
|
|
||||||
|
flush_ptr(range.start, 4);
|
||||||
|
flush_ptr(range.end - 3, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
bool test_memory_head()
|
bool test_memory_head()
|
||||||
{
|
{
|
||||||
if (!locked_memory_ptr)
|
if (guard_policy == protect_policy_full_range)
|
||||||
{
|
return true;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
|
AUDIT(locked && super_ptr);
|
||||||
return (*first == (cpu_address_base + confirmed_range.first));
|
|
||||||
|
const auto& range = get_confirmed_range();
|
||||||
|
volatile const u32* first = get_ptr<volatile const u32>(range.start);
|
||||||
|
return (*first == range.start);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool test_memory_tail()
|
bool test_memory_tail()
|
||||||
{
|
{
|
||||||
if (!locked_memory_ptr)
|
if (guard_policy == protect_policy_full_range)
|
||||||
{
|
return true;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
|
AUDIT(locked && super_ptr);
|
||||||
const u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
|
|
||||||
return (*last == (cpu_address_base + valid_limit - 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
void flush_io(u32 offset = 0, u32 len = 0) const
|
const auto& range = get_confirmed_range();
|
||||||
{
|
volatile const u32* last = get_ptr<volatile const u32>(range.end-3);
|
||||||
const auto write_length = len ? len : (cpu_address_range - offset);
|
return (*last == range.end);
|
||||||
locked_memory_ptr.flush(offset, write_length);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<u32, u32> get_protected_range() const
|
|
||||||
{
|
|
||||||
if (locked)
|
|
||||||
{
|
|
||||||
return { locked_address_base, locked_address_range };
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return { 0, 0 };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<u32, u32> get_confirmed_range() const
|
|
||||||
{
|
|
||||||
if (confirmed_range.second == 0)
|
|
||||||
{
|
|
||||||
return { 0, cpu_address_range };
|
|
||||||
}
|
|
||||||
|
|
||||||
return confirmed_range;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename pipeline_storage_type, typename backend_storage>
|
template <typename pipeline_storage_type, typename backend_storage>
|
||||||
class shaders_cache
|
class shaders_cache
|
||||||
{
|
{
|
||||||
|
|
|
@ -76,6 +76,11 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
weak_ptr get_super_ptr(const address_range &range)
|
||||||
|
{
|
||||||
|
return get_super_ptr(range.start, range.length());
|
||||||
|
}
|
||||||
|
|
||||||
weak_ptr get_super_ptr(u32 addr, u32 len)
|
weak_ptr get_super_ptr(u32 addr, u32 len)
|
||||||
{
|
{
|
||||||
verify(HERE), g_current_renderer;
|
verify(HERE), g_current_renderer;
|
||||||
|
@ -507,4 +512,8 @@ namespace rsx
|
||||||
++src_ptr;
|
++src_ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef TEXTURE_CACHE_DEBUG
|
||||||
|
tex_cache_checker_t tex_cache_checker = {};
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "../System.h"
|
#include "../System.h"
|
||||||
|
#include "Utilities/address_range.h"
|
||||||
#include "Utilities/geometry.h"
|
#include "Utilities/geometry.h"
|
||||||
#include "Utilities/asm.h"
|
#include "Utilities/asm.h"
|
||||||
|
#include "Utilities/VirtualMemory.h"
|
||||||
|
#include "Emu/Memory/vm.h"
|
||||||
#include "gcm_enums.h"
|
#include "gcm_enums.h"
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -16,6 +19,15 @@ extern "C"
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
|
// Import address_range utilities
|
||||||
|
using utils::address_range;
|
||||||
|
using utils::address_range_vector;
|
||||||
|
using utils::page_for;
|
||||||
|
using utils::page_start;
|
||||||
|
using utils::page_end;
|
||||||
|
using utils::next_page;
|
||||||
|
|
||||||
|
// Definitions
|
||||||
class thread;
|
class thread;
|
||||||
extern thread* g_current_renderer;
|
extern thread* g_current_renderer;
|
||||||
|
|
||||||
|
@ -200,7 +212,14 @@ namespace rsx
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//Holds information about a framebuffer
|
// Acquire memory mirror with r/w permissions
|
||||||
|
weak_ptr get_super_ptr(const address_range &range);
|
||||||
|
weak_ptr get_super_ptr(u32 addr, u32 size);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds information about a framebuffer
|
||||||
|
*/
|
||||||
struct gcm_framebuffer_info
|
struct gcm_framebuffer_info
|
||||||
{
|
{
|
||||||
u32 address = 0;
|
u32 address = 0;
|
||||||
|
@ -223,6 +242,11 @@ namespace rsx
|
||||||
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
|
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
|
||||||
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
|
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
address_range get_memory_range(u32 aa_factor = 1) const
|
||||||
|
{
|
||||||
|
return address_range::start_length(address, pitch * height * aa_factor);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct avconf
|
struct avconf
|
||||||
|
@ -463,9 +487,6 @@ namespace rsx
|
||||||
|
|
||||||
std::array<float, 4> get_constant_blend_colors();
|
std::array<float, 4> get_constant_blend_colors();
|
||||||
|
|
||||||
// Acquire memory mirror with r/w permissions
|
|
||||||
weak_ptr get_super_ptr(u32 addr, u32 size);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shuffle texel layout from xyzw to wzyx
|
* Shuffle texel layout from xyzw to wzyx
|
||||||
* TODO: Variable src/dst and optional se conversion
|
* TODO: Variable src/dst and optional se conversion
|
||||||
|
@ -727,11 +748,6 @@ namespace rsx
|
||||||
return g_current_renderer;
|
return g_current_renderer;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
|
|
||||||
{
|
|
||||||
return (base1 < limit2 && base2 < limit1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <int N>
|
template <int N>
|
||||||
void unpack_bitset(std::bitset<N>& block, u64* values)
|
void unpack_bitset(std::bitset<N>& block, u64* values)
|
||||||
{
|
{
|
||||||
|
|
|
@ -528,6 +528,8 @@
|
||||||
<ClInclude Include="Emu\RSX\Common\GLSLCommon.h" />
|
<ClInclude Include="Emu\RSX\Common\GLSLCommon.h" />
|
||||||
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
|
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
|
||||||
<ClInclude Include="Emu\RSX\Common\texture_cache.h" />
|
<ClInclude Include="Emu\RSX\Common\texture_cache.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h" />
|
||||||
<ClInclude Include="Emu\RSX\gcm_enums.h" />
|
<ClInclude Include="Emu\RSX\gcm_enums.h" />
|
||||||
<ClInclude Include="Emu\RSX\gcm_printing.h" />
|
<ClInclude Include="Emu\RSX\gcm_printing.h" />
|
||||||
<ClInclude Include="Emu\RSX\Overlays\overlays.h" />
|
<ClInclude Include="Emu\RSX\Overlays\overlays.h" />
|
||||||
|
|
|
@ -1444,5 +1444,11 @@
|
||||||
<ClInclude Include="..\Utilities\address_range.h">
|
<ClInclude Include="..\Utilities\address_range.h">
|
||||||
<Filter>Utilities</Filter>
|
<Filter>Utilities</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h">
|
||||||
|
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h">
|
||||||
|
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
Loading…
Add table
Add a link
Reference in a new issue