rsx: Avoid on-the-fly ZCULL allocations with unordered_map

This commit is contained in:
kd-11 2022-09-07 22:18:59 +03:00 committed by kd-11
parent 60fc90bb8e
commit cd53bb7eff
4 changed files with 54 additions and 35 deletions

View file

@ -308,7 +308,7 @@ void GLGSRender::on_init_thread()
} }
//Occlusion query //Occlusion query
for (u32 i = 0; i < occlusion_query_count; ++i) for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
{ {
GLuint handle = 0; GLuint handle = 0;
auto &query = m_occlusion_query_data[i]; auto &query = m_occlusion_query_data[i];
@ -484,7 +484,7 @@ void GLGSRender::on_exit()
m_shader_interpreter.destroy(); m_shader_interpreter.destroy();
for (u32 i = 0; i < occlusion_query_count; ++i) for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
{ {
auto &query = m_occlusion_query_data[i]; auto &query = m_occlusion_query_data[i];
query.active = false; query.active = false;

View file

@ -11,6 +11,11 @@ namespace rsx
{ {
m_free_occlusion_pool.push(&query); m_free_occlusion_pool.push(&query);
} }
for (auto& stat : m_statistics_map)
{
stat.flags = stat.result = 0;
}
} }
ZCULL_control::~ZCULL_control() ZCULL_control::~ZCULL_control()
@ -157,6 +162,8 @@ namespace rsx
} }
auto forwarder = &m_pending_writes.back(); auto forwarder = &m_pending_writes.back();
m_statistics_map[m_statistics_tag_id].flags |= 1;
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++) for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++)
{ {
if (!It->sink) if (!It->sink)
@ -272,8 +279,26 @@ namespace rsx
m_pending_writes.resize(valid_size); m_pending_writes.resize(valid_size);
} }
m_statistics_tag_id++; if (m_pending_writes.empty())
m_statistics_map[m_statistics_tag_id] = {}; {
// Clear can be invoked from flip as a workaround to prevent query leakage.
m_statistics_map[m_statistics_tag_id].flags = 0;
}
if (m_statistics_map[m_statistics_tag_id].flags)
{
m_statistics_tag_id = (m_statistics_tag_id + 1) % max_stat_registers;
auto data = m_statistics_map.data() + m_statistics_tag_id;
if (data->flags != 0)
{
// This shouldn't happen
rsx_log.error("Allocating a new ZCULL statistics slot %u overwrites previous data.", m_statistics_tag_id);
}
// Clear value before use
data->result = 0;
}
} }
void ZCULL_control::on_draw() void ZCULL_control::on_draw()
@ -463,12 +488,16 @@ namespace rsx
} }
// Delete all statistics caches but leave the current one // Delete all statistics caches but leave the current one
for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); ) const u32 current_index = m_statistics_tag_id;
for (u32 index = current_index - 1; index != current_index;)
{ {
if (It->first == m_statistics_tag_id) if (m_statistics_map[index].flags == 0)
++It; {
else break;
It = m_statistics_map.erase(It); }
m_statistics_map[index].flags = 0;
index = (index + max_stat_registers - 1) % max_stat_registers;
} }
//Decrement jobs counter //Decrement jobs counter
@ -534,22 +563,12 @@ namespace rsx
} }
} }
u32 stat_tag_to_remove = m_statistics_tag_id;
u32 processed = 0; u32 processed = 0;
for (auto& writer : m_pending_writes) for (auto& writer : m_pending_writes)
{ {
if (!writer.sink) if (!writer.sink)
break; break;
if (writer.counter_tag != stat_tag_to_remove &&
stat_tag_to_remove != m_statistics_tag_id)
{
//If the stat id is different from this stat id and the queue is advancing,
//its guaranteed that the previous tag has no remaining writes as the queue is ordered
m_statistics_map.erase(stat_tag_to_remove);
stat_tag_to_remove = m_statistics_tag_id;
}
auto query = writer.query; auto query = writer.query;
auto& counter = m_statistics_map[writer.counter_tag]; auto& counter = m_statistics_map[writer.counter_tag];
@ -586,15 +605,13 @@ namespace rsx
free_query(query); free_query(query);
} }
stat_tag_to_remove = writer.counter_tag; // Release the stat tag for this object. Slots are all or nothing.
m_statistics_map[writer.counter_tag].flags = 0;
retire(ptimer, &writer, counter.result); retire(ptimer, &writer, counter.result);
processed++; processed++;
} }
if (stat_tag_to_remove != m_statistics_tag_id)
m_statistics_map.erase(stat_tag_to_remove);
if (processed) if (processed)
{ {
auto remaining = m_pending_writes.size() - processed; auto remaining = m_pending_writes.size() - processed;

View file

@ -62,7 +62,7 @@ namespace rsx
struct query_stat_counter struct query_stat_counter
{ {
u32 result; u32 result;
u32 reserved; u32 flags;
}; };
struct sync_hint_payload_t struct sync_hint_payload_t
@ -84,6 +84,15 @@ namespace rsx
sync_no_notify = 2 // If set, backend hint notifications will not be made sync_no_notify = 2 // If set, backend hint notifications will not be made
}; };
enum constants
{
max_zcull_delay_us = 300, // Delay before a report update operation is forced to retire
min_zcull_tick_us = 100, // Default tick duration. To avoid hardware spam, we schedule peeks in multiples of this.
occlusion_query_count = 2048, // Number of occlusion query slots available. Real hardware actually has far fewer units before choking
max_safe_queue_depth = 1792, // Number of in-flight queries before we start forcefully flushing data from the GPU device.
max_stat_registers = 8192 // Size of the statistics cache
};
class ZCULL_control class ZCULL_control
{ {
private: private:
@ -97,13 +106,6 @@ namespace rsx
void disable_optimizations(class ::rsx::thread* ptimer, u32 location); void disable_optimizations(class ::rsx::thread* ptimer, u32 location);
protected: protected:
// Delay before a report update operation is forced to retire
const u32 max_zcull_delay_us = 300;
const u32 min_zcull_tick_us = 100;
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
const u32 occlusion_query_count = 2048;
const u32 max_safe_queue_depth = 1792;
bool unit_enabled = false; // The ZCULL unit is on bool unit_enabled = false; // The ZCULL unit is on
bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization
@ -126,7 +128,7 @@ namespace rsx
u64 m_timer = 0; u64 m_timer = 0;
std::vector<queued_report_write> m_pending_writes{}; std::vector<queued_report_write> m_pending_writes{};
std::unordered_map<u32, query_stat_counter> m_statistics_map{}; std::array<query_stat_counter, max_stat_registers> m_statistics_map{};
// Enables/disables the ZCULL unit // Enables/disables the ZCULL unit
void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue); void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue);

View file

@ -420,9 +420,9 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
//Occlusion //Occlusion
m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE);
m_occlusion_map.resize(occlusion_query_count); m_occlusion_map.resize(rsx::reports::occlusion_query_count);
for (u32 n = 0; n < occlusion_query_count; ++n) for (u32 n = 0; n < rsx::reports::occlusion_query_count; ++n)
m_occlusion_query_data[n].driver_handle = n; m_occlusion_query_data[n].driver_handle = n;
if (g_cfg.video.precise_zpass_count) if (g_cfg.video.precise_zpass_count)