zcull synchronization tweaks

- Implement forced reading when calling update method to sync partial lists
- Defer conditional render evaluation and use a read barrier to avoid extra work
- Fix HLE gcm library when binding tiles & zcull RAM
This commit is contained in:
kd-11 2018-07-19 19:57:01 +03:00 committed by kd-11
parent 3b47e43380
commit 8800c10476
6 changed files with 116 additions and 33 deletions

View file

@ -698,6 +698,7 @@ void cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart,
zcull.sFunc = sFunc; zcull.sFunc = sFunc;
zcull.sRef = sRef; zcull.sRef = sRef;
zcull.sMask = sMask; zcull.sMask = sMask;
zcull.binded = (zCullFormat > 0);
vm::_ptr<CellGcmZcullInfo>(m_config->zculls_addr)[index] = zcull.pack(); vm::_ptr<CellGcmZcullInfo>(m_config->zculls_addr)[index] = zcull.pack();
} }
@ -1261,6 +1262,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co
tile.comp = comp; tile.comp = comp;
tile.base = base; tile.base = base;
tile.bank = bank; tile.bank = bank;
tile.binded = (pitch > 0);
vm::_ptr<CellGcmTileInfo>(m_config->tiles_addr)[index] = tile.pack(); vm::_ptr<CellGcmTileInfo>(m_config->tiles_addr)[index] = tile.pack();
return CELL_OK; return CELL_OK;

View file

@ -275,6 +275,15 @@ namespace rsx
void thread::begin() void thread::begin()
{ {
if (conditional_render_enabled && conditional_render_test_address)
{
// Evaluate conditional rendering test
zcull_ctrl->read_barrier(this, conditional_render_test_address, 4);
vm::ptr<CellGcmReportData> result = vm::cast(conditional_render_test_address);
conditional_render_test_failed = (result->value == 0);
conditional_render_test_address = 0;
}
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0); rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
in_begin_end = true; in_begin_end = true;
@ -2719,7 +2728,8 @@ namespace rsx
{ {
verify(HERE), query->pending; verify(HERE), query->pending;
if (!result && query->num_draws) const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3);
if (implemented && !result && query->num_draws)
{ {
get_occlusion_query_result(query); get_occlusion_query_result(query);
@ -2784,13 +2794,23 @@ namespace rsx
m_cycles_delay = min_zcull_cycles_delay; m_cycles_delay = min_zcull_cycles_delay;
} }
void ZCULL_control::update(::rsx::thread* ptimer) void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address)
{ {
m_tsc++; m_tsc++;
if (m_pending_writes.empty()) if (m_pending_writes.empty())
return; return;
if (!sync_address)
{
const auto& front = m_pending_writes.front();
if (!front.sink || m_tsc < front.due_tsc)
{
// Avoid spamming backend with report status updates
return;
}
}
u32 stat_tag_to_remove = m_statistics_tag_id; u32 stat_tag_to_remove = m_statistics_tag_id;
u32 processed = 0; u32 processed = 0;
for (auto &writer : m_pending_writes) for (auto &writer : m_pending_writes)
@ -2810,13 +2830,21 @@ namespace rsx
auto query = writer.query; auto query = writer.query;
u32 result = m_statistics_map[writer.counter_tag]; u32 result = m_statistics_map[writer.counter_tag];
const bool force_read = (sync_address != 0);
if (force_read && writer.sink == sync_address)
{
// Forced reads end here
sync_address = 0;
}
if (query) if (query)
{ {
verify(HERE), query->pending; verify(HERE), query->pending;
if (UNLIKELY(writer.due_tsc < m_tsc)) const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3);
if (force_read || writer.due_tsc < m_tsc)
{ {
if (!result && query->num_draws) if (implemented && !result && query->num_draws)
{ {
get_occlusion_query_result(query); get_occlusion_query_result(query);
@ -2834,12 +2862,7 @@ namespace rsx
} }
else else
{ {
if (result || !query->num_draws) if (implemented && !result && query->num_draws)
{
//Not necessary to read the result anymore
discard_occlusion_query(query);
}
else
{ {
//Maybe we get lucky and results are ready //Maybe we get lucky and results are ready
if (check_occlusion_query_status(query)) if (check_occlusion_query_status(query))
@ -2857,6 +2880,11 @@ namespace rsx
break; break;
} }
} }
else
{
//Not necessary to read the result anymore
discard_occlusion_query(query);
}
} }
query->pending = false; query->pending = false;
@ -2903,13 +2931,19 @@ namespace rsx
return; return;
const auto memory_end = memory_address + memory_range; const auto memory_end = memory_address + memory_range;
u32 sync_address = 0;
for (const auto &writer : m_pending_writes) for (const auto &writer : m_pending_writes)
{ {
if (writer.sink >= memory_address && writer.sink < memory_end) if (writer.sink >= memory_address && writer.sink < memory_end)
{ {
sync(ptimer); sync_address = writer.sink;
return; }
} }
if (sync_address)
{
update(ptimer, sync_address);
} }
} }
} }

View file

@ -118,6 +118,11 @@ namespace rsx
lock_wait = 4 // Puller is processing a lock acquire lock_wait = 4 // Puller is processing a lock acquire
}; };
enum FIFO_hint : u8
{
hint_conditional_render_eval = 1
};
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size); u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
u32 get_address(u32 offset, u32 location); u32 get_address(u32 offset, u32 location);
@ -230,7 +235,7 @@ namespace rsx
queued_report_write* forwarder; queued_report_write* forwarder;
vm::addr_t sink; vm::addr_t sink;
u32 due_tsc; u64 due_tsc;
}; };
struct ZCULL_control struct ZCULL_control
@ -249,7 +254,7 @@ namespace rsx
occlusion_query_info* m_current_task = nullptr; occlusion_query_info* m_current_task = nullptr;
u32 m_statistics_tag_id = 0; u32 m_statistics_tag_id = 0;
u32 m_tsc = 0; u64 m_tsc = 0;
u32 m_cycles_delay = max_zcull_cycles_delay; u32 m_cycles_delay = max_zcull_cycles_delay;
std::vector<queued_report_write> m_pending_writes; std::vector<queued_report_write> m_pending_writes;
@ -278,8 +283,8 @@ namespace rsx
// Conditionally sync any pending writes if range overlaps // Conditionally sync any pending writes if range overlaps
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range); void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
// Call once every 'tick' to update // Call once every 'tick' to update, optional address provided to partially sync until address is processed
void update(class ::rsx::thread* ptimer); void update(class ::rsx::thread* ptimer, u32 sync_address = 0);
// Draw call notification // Draw call notification
void on_draw(); void on_draw();
@ -433,6 +438,7 @@ namespace rsx
atomic_t<s32> async_tasks_pending{ 0 }; atomic_t<s32> async_tasks_pending{ 0 };
u32 conditional_render_test_address = 0;
bool conditional_render_test_failed = false; bool conditional_render_test_failed = false;
bool conditional_render_enabled = false; bool conditional_render_enabled = false;
bool zcull_stats_enabled = false; bool zcull_stats_enabled = false;
@ -482,6 +488,7 @@ namespace rsx
// sync // sync
void sync(); void sync();
void read_barrier(u32 memory_address, u32 memory_range); void read_barrier(u32 memory_address, u32 memory_range);
virtual void sync_hint(FIFO_hint hint) {}
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const; gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const; gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;

View file

@ -820,16 +820,26 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
command_buffer_chunk *target_cb = nullptr; command_buffer_chunk *target_cb = nullptr;
for (auto &cb : m_primary_cb_list) for (auto &cb : m_primary_cb_list)
{ {
if (cb.pending && cb.last_sync >= sync_timestamp) if (cb.last_sync >= sync_timestamp)
{ {
if (!cb.pending)
{
target_cb = nullptr;
break;
}
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync) if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
{
target_cb = &cb; target_cb = &cb;
} }
} }
}
if (target_cb) if (target_cb)
{
target_cb->wait(); target_cb->wait();
} }
}
if (has_queue_ref) if (has_queue_ref)
{ {
@ -1435,6 +1445,8 @@ void VKGSRender::end()
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id); m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer; m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags |= cb_has_occlusion_task;
} }
if (!upload_info.index_info) if (!upload_info.index_info)
@ -1486,6 +1498,7 @@ void VKGSRender::end()
close_render_pass(); close_render_pass();
vk::leave_uninterruptible(); vk::leave_uninterruptible();
m_current_command_buffer->num_draws++;
m_rtts.on_write(); m_rtts.on_write();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
@ -1834,6 +1847,22 @@ void VKGSRender::flush_command_queue(bool hard_sync)
open_command_buffer(); open_command_buffer();
} }
void VKGSRender::sync_hint(rsx::FIFO_hint hint)
{
if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
{
if (m_current_command_buffer->flags & cb_has_occlusion_task)
{
// Occlusion test result evaluation is coming up, avoid a hard sync
if (!m_flush_requests.pending())
{
m_flush_requests.post(false);
m_flush_requests.remove_one();
}
}
}
}
void VKGSRender::advance_queued_frames() void VKGSRender::advance_queued_frames()
{ {
//Check all other frames for completion and clear resources //Check all other frames for completion and clear resources
@ -3289,11 +3318,14 @@ void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
//Avoid stalling later if this query is already tied to a report //Avoid stalling later if this query is already tied to a report
if (query->num_draws && query->owned && !m_flush_requests.pending()) if (query->num_draws && query->owned && !m_flush_requests.pending())
{
if (0)//m_current_command_buffer->flags & cb_has_occlusion_task)
{ {
m_flush_requests.post(false); m_flush_requests.post(false);
m_flush_requests.remove_one(); m_flush_requests.remove_one();
} }
} }
}
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query) bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
{ {

View file

@ -47,11 +47,19 @@ namespace vk
extern u64 get_system_time(); extern u64 get_system_time();
enum command_buffer_data_flag
{
cb_has_occlusion_task = 1
};
struct command_buffer_chunk: public vk::command_buffer struct command_buffer_chunk: public vk::command_buffer
{ {
VkFence submit_fence = VK_NULL_HANDLE; VkFence submit_fence = VK_NULL_HANDLE;
VkDevice m_device = VK_NULL_HANDLE; VkDevice m_device = VK_NULL_HANDLE;
u32 num_draws = 0;
u32 flags = 0;
std::atomic_bool pending = { false }; std::atomic_bool pending = { false };
std::atomic<u64> last_sync = { 0 }; std::atomic<u64> last_sync = { 0 };
shared_mutex guard_mutex; shared_mutex guard_mutex;
@ -90,12 +98,17 @@ struct command_buffer_chunk: public vk::command_buffer
wait(); wait();
CHECK_RESULT(vkResetCommandBuffer(commands, 0)); CHECK_RESULT(vkResetCommandBuffer(commands, 0));
num_draws = 0;
flags = 0;
} }
bool poke() bool poke()
{ {
reader_lock lock(guard_mutex); reader_lock lock(guard_mutex);
if (!pending)
return true;
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS) if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
{ {
lock.upgrade(); lock.upgrade();
@ -117,14 +130,8 @@ struct command_buffer_chunk: public vk::command_buffer
if (!pending) if (!pending)
return; return;
switch(vkGetFenceStatus(m_device, submit_fence)) // NOTE: vkWaitForFences is slower than polling fence status at least on NV
{ while (vkGetFenceStatus(m_device, submit_fence) == VK_NOT_READY);
case VK_SUCCESS:
break;
case VK_NOT_READY:
CHECK_RESULT(vkWaitForFences(m_device, 1, &submit_fence, VK_TRUE, UINT64_MAX));
break;
}
lock.upgrade(); lock.upgrade();
@ -406,6 +413,8 @@ public:
void write_buffers(); void write_buffers();
void set_viewport(); void set_viewport();
void sync_hint(rsx::FIFO_hint hint) override;
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override; bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;

View file

@ -521,9 +521,10 @@ namespace rsx
return; return;
} }
rsx->sync(); // Defer conditional render evaluation
vm::ptr<CellGcmReportData> result = address_ptr; rsx->sync_hint(FIFO_hint::hint_conditional_render_eval);
rsx->conditional_render_test_failed = (result->value == 0); rsx->conditional_render_test_address = address_ptr;
rsx->conditional_render_test_failed = false;
} }
void set_zcull_render_enable(thread* rsx, u32, u32 arg) void set_zcull_render_enable(thread* rsx, u32, u32 arg)
@ -1809,8 +1810,6 @@ namespace rsx
bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>(); bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>();
bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>(); bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>();
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>(); bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>(); bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>(); bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>(); bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();