mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 18:58:36 +12:00
zcull synchronization tweaks
- Implement forced reading when calling update method to sync partial lists - Defer conditional render evaluation and use a read barrier to avoid extra work - Fix HLE gcm library when binding tiles & zcull RAM
This commit is contained in:
parent
3b47e43380
commit
8800c10476
6 changed files with 116 additions and 33 deletions
|
@ -698,6 +698,7 @@ void cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart,
|
||||||
zcull.sFunc = sFunc;
|
zcull.sFunc = sFunc;
|
||||||
zcull.sRef = sRef;
|
zcull.sRef = sRef;
|
||||||
zcull.sMask = sMask;
|
zcull.sMask = sMask;
|
||||||
|
zcull.binded = (zCullFormat > 0);
|
||||||
|
|
||||||
vm::_ptr<CellGcmZcullInfo>(m_config->zculls_addr)[index] = zcull.pack();
|
vm::_ptr<CellGcmZcullInfo>(m_config->zculls_addr)[index] = zcull.pack();
|
||||||
}
|
}
|
||||||
|
@ -1261,6 +1262,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co
|
||||||
tile.comp = comp;
|
tile.comp = comp;
|
||||||
tile.base = base;
|
tile.base = base;
|
||||||
tile.bank = bank;
|
tile.bank = bank;
|
||||||
|
tile.binded = (pitch > 0);
|
||||||
|
|
||||||
vm::_ptr<CellGcmTileInfo>(m_config->tiles_addr)[index] = tile.pack();
|
vm::_ptr<CellGcmTileInfo>(m_config->tiles_addr)[index] = tile.pack();
|
||||||
return CELL_OK;
|
return CELL_OK;
|
||||||
|
|
|
@ -275,6 +275,15 @@ namespace rsx
|
||||||
|
|
||||||
void thread::begin()
|
void thread::begin()
|
||||||
{
|
{
|
||||||
|
if (conditional_render_enabled && conditional_render_test_address)
|
||||||
|
{
|
||||||
|
// Evaluate conditional rendering test
|
||||||
|
zcull_ctrl->read_barrier(this, conditional_render_test_address, 4);
|
||||||
|
vm::ptr<CellGcmReportData> result = vm::cast(conditional_render_test_address);
|
||||||
|
conditional_render_test_failed = (result->value == 0);
|
||||||
|
conditional_render_test_address = 0;
|
||||||
|
}
|
||||||
|
|
||||||
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
|
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
|
||||||
in_begin_end = true;
|
in_begin_end = true;
|
||||||
|
|
||||||
|
@ -2719,7 +2728,8 @@ namespace rsx
|
||||||
{
|
{
|
||||||
verify(HERE), query->pending;
|
verify(HERE), query->pending;
|
||||||
|
|
||||||
if (!result && query->num_draws)
|
const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3);
|
||||||
|
if (implemented && !result && query->num_draws)
|
||||||
{
|
{
|
||||||
get_occlusion_query_result(query);
|
get_occlusion_query_result(query);
|
||||||
|
|
||||||
|
@ -2784,13 +2794,23 @@ namespace rsx
|
||||||
m_cycles_delay = min_zcull_cycles_delay;
|
m_cycles_delay = min_zcull_cycles_delay;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::update(::rsx::thread* ptimer)
|
void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address)
|
||||||
{
|
{
|
||||||
m_tsc++;
|
m_tsc++;
|
||||||
|
|
||||||
if (m_pending_writes.empty())
|
if (m_pending_writes.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (!sync_address)
|
||||||
|
{
|
||||||
|
const auto& front = m_pending_writes.front();
|
||||||
|
if (!front.sink || m_tsc < front.due_tsc)
|
||||||
|
{
|
||||||
|
// Avoid spamming backend with report status updates
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u32 stat_tag_to_remove = m_statistics_tag_id;
|
u32 stat_tag_to_remove = m_statistics_tag_id;
|
||||||
u32 processed = 0;
|
u32 processed = 0;
|
||||||
for (auto &writer : m_pending_writes)
|
for (auto &writer : m_pending_writes)
|
||||||
|
@ -2810,13 +2830,21 @@ namespace rsx
|
||||||
auto query = writer.query;
|
auto query = writer.query;
|
||||||
u32 result = m_statistics_map[writer.counter_tag];
|
u32 result = m_statistics_map[writer.counter_tag];
|
||||||
|
|
||||||
|
const bool force_read = (sync_address != 0);
|
||||||
|
if (force_read && writer.sink == sync_address)
|
||||||
|
{
|
||||||
|
// Forced reads end here
|
||||||
|
sync_address = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (query)
|
if (query)
|
||||||
{
|
{
|
||||||
verify(HERE), query->pending;
|
verify(HERE), query->pending;
|
||||||
|
|
||||||
if (UNLIKELY(writer.due_tsc < m_tsc))
|
const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3);
|
||||||
|
if (force_read || writer.due_tsc < m_tsc)
|
||||||
{
|
{
|
||||||
if (!result && query->num_draws)
|
if (implemented && !result && query->num_draws)
|
||||||
{
|
{
|
||||||
get_occlusion_query_result(query);
|
get_occlusion_query_result(query);
|
||||||
|
|
||||||
|
@ -2834,12 +2862,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (result || !query->num_draws)
|
if (implemented && !result && query->num_draws)
|
||||||
{
|
|
||||||
//Not necessary to read the result anymore
|
|
||||||
discard_occlusion_query(query);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
//Maybe we get lucky and results are ready
|
//Maybe we get lucky and results are ready
|
||||||
if (check_occlusion_query_status(query))
|
if (check_occlusion_query_status(query))
|
||||||
|
@ -2857,6 +2880,11 @@ namespace rsx
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Not necessary to read the result anymore
|
||||||
|
discard_occlusion_query(query);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
query->pending = false;
|
query->pending = false;
|
||||||
|
@ -2903,14 +2931,20 @@ namespace rsx
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto memory_end = memory_address + memory_range;
|
const auto memory_end = memory_address + memory_range;
|
||||||
|
u32 sync_address = 0;
|
||||||
|
|
||||||
for (const auto &writer : m_pending_writes)
|
for (const auto &writer : m_pending_writes)
|
||||||
{
|
{
|
||||||
if (writer.sink >= memory_address && writer.sink < memory_end)
|
if (writer.sink >= memory_address && writer.sink < memory_end)
|
||||||
{
|
{
|
||||||
sync(ptimer);
|
sync_address = writer.sink;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sync_address)
|
||||||
|
{
|
||||||
|
update(ptimer, sync_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -118,6 +118,11 @@ namespace rsx
|
||||||
lock_wait = 4 // Puller is processing a lock acquire
|
lock_wait = 4 // Puller is processing a lock acquire
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum FIFO_hint : u8
|
||||||
|
{
|
||||||
|
hint_conditional_render_eval = 1
|
||||||
|
};
|
||||||
|
|
||||||
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
|
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
|
||||||
|
|
||||||
u32 get_address(u32 offset, u32 location);
|
u32 get_address(u32 offset, u32 location);
|
||||||
|
@ -230,7 +235,7 @@ namespace rsx
|
||||||
queued_report_write* forwarder;
|
queued_report_write* forwarder;
|
||||||
vm::addr_t sink;
|
vm::addr_t sink;
|
||||||
|
|
||||||
u32 due_tsc;
|
u64 due_tsc;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ZCULL_control
|
struct ZCULL_control
|
||||||
|
@ -249,7 +254,7 @@ namespace rsx
|
||||||
|
|
||||||
occlusion_query_info* m_current_task = nullptr;
|
occlusion_query_info* m_current_task = nullptr;
|
||||||
u32 m_statistics_tag_id = 0;
|
u32 m_statistics_tag_id = 0;
|
||||||
u32 m_tsc = 0;
|
u64 m_tsc = 0;
|
||||||
u32 m_cycles_delay = max_zcull_cycles_delay;
|
u32 m_cycles_delay = max_zcull_cycles_delay;
|
||||||
|
|
||||||
std::vector<queued_report_write> m_pending_writes;
|
std::vector<queued_report_write> m_pending_writes;
|
||||||
|
@ -278,8 +283,8 @@ namespace rsx
|
||||||
// Conditionally sync any pending writes if range overlaps
|
// Conditionally sync any pending writes if range overlaps
|
||||||
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
|
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
|
||||||
|
|
||||||
// Call once every 'tick' to update
|
// Call once every 'tick' to update, optional address provided to partially sync until address is processed
|
||||||
void update(class ::rsx::thread* ptimer);
|
void update(class ::rsx::thread* ptimer, u32 sync_address = 0);
|
||||||
|
|
||||||
// Draw call notification
|
// Draw call notification
|
||||||
void on_draw();
|
void on_draw();
|
||||||
|
@ -433,6 +438,7 @@ namespace rsx
|
||||||
|
|
||||||
atomic_t<s32> async_tasks_pending{ 0 };
|
atomic_t<s32> async_tasks_pending{ 0 };
|
||||||
|
|
||||||
|
u32 conditional_render_test_address = 0;
|
||||||
bool conditional_render_test_failed = false;
|
bool conditional_render_test_failed = false;
|
||||||
bool conditional_render_enabled = false;
|
bool conditional_render_enabled = false;
|
||||||
bool zcull_stats_enabled = false;
|
bool zcull_stats_enabled = false;
|
||||||
|
@ -482,6 +488,7 @@ namespace rsx
|
||||||
// sync
|
// sync
|
||||||
void sync();
|
void sync();
|
||||||
void read_barrier(u32 memory_address, u32 memory_range);
|
void read_barrier(u32 memory_address, u32 memory_range);
|
||||||
|
virtual void sync_hint(FIFO_hint hint) {}
|
||||||
|
|
||||||
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
||||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||||
|
|
|
@ -816,20 +816,30 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
|
|
||||||
if (sync_timestamp > 0)
|
if (sync_timestamp > 0)
|
||||||
{
|
{
|
||||||
//Wait for earliest cb submitted after the sync timestamp to finish
|
// Wait for earliest cb submitted after the sync timestamp to finish
|
||||||
command_buffer_chunk *target_cb = nullptr;
|
command_buffer_chunk *target_cb = nullptr;
|
||||||
for (auto &cb : m_primary_cb_list)
|
for (auto &cb : m_primary_cb_list)
|
||||||
{
|
{
|
||||||
if (cb.pending && cb.last_sync >= sync_timestamp)
|
if (cb.last_sync >= sync_timestamp)
|
||||||
{
|
{
|
||||||
|
if (!cb.pending)
|
||||||
|
{
|
||||||
|
target_cb = nullptr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
|
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
|
||||||
|
{
|
||||||
target_cb = &cb;
|
target_cb = &cb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (target_cb)
|
if (target_cb)
|
||||||
|
{
|
||||||
target_cb->wait();
|
target_cb->wait();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (has_queue_ref)
|
if (has_queue_ref)
|
||||||
{
|
{
|
||||||
|
@ -1435,6 +1445,8 @@ void VKGSRender::end()
|
||||||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||||
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
|
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
|
||||||
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
||||||
|
|
||||||
|
m_current_command_buffer->flags |= cb_has_occlusion_task;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!upload_info.index_info)
|
if (!upload_info.index_info)
|
||||||
|
@ -1486,6 +1498,7 @@ void VKGSRender::end()
|
||||||
close_render_pass();
|
close_render_pass();
|
||||||
vk::leave_uninterruptible();
|
vk::leave_uninterruptible();
|
||||||
|
|
||||||
|
m_current_command_buffer->num_draws++;
|
||||||
m_rtts.on_write();
|
m_rtts.on_write();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||||
|
@ -1834,6 +1847,22 @@ void VKGSRender::flush_command_queue(bool hard_sync)
|
||||||
open_command_buffer();
|
open_command_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VKGSRender::sync_hint(rsx::FIFO_hint hint)
|
||||||
|
{
|
||||||
|
if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
|
||||||
|
{
|
||||||
|
if (m_current_command_buffer->flags & cb_has_occlusion_task)
|
||||||
|
{
|
||||||
|
// Occlusion test result evaluation is coming up, avoid a hard sync
|
||||||
|
if (!m_flush_requests.pending())
|
||||||
|
{
|
||||||
|
m_flush_requests.post(false);
|
||||||
|
m_flush_requests.remove_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VKGSRender::advance_queued_frames()
|
void VKGSRender::advance_queued_frames()
|
||||||
{
|
{
|
||||||
//Check all other frames for completion and clear resources
|
//Check all other frames for completion and clear resources
|
||||||
|
@ -3289,10 +3318,13 @@ void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||||
|
|
||||||
//Avoid stalling later if this query is already tied to a report
|
//Avoid stalling later if this query is already tied to a report
|
||||||
if (query->num_draws && query->owned && !m_flush_requests.pending())
|
if (query->num_draws && query->owned && !m_flush_requests.pending())
|
||||||
|
{
|
||||||
|
if (0)//m_current_command_buffer->flags & cb_has_occlusion_task)
|
||||||
{
|
{
|
||||||
m_flush_requests.post(false);
|
m_flush_requests.post(false);
|
||||||
m_flush_requests.remove_one();
|
m_flush_requests.remove_one();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
||||||
|
|
|
@ -47,11 +47,19 @@ namespace vk
|
||||||
|
|
||||||
extern u64 get_system_time();
|
extern u64 get_system_time();
|
||||||
|
|
||||||
|
enum command_buffer_data_flag
|
||||||
|
{
|
||||||
|
cb_has_occlusion_task = 1
|
||||||
|
};
|
||||||
|
|
||||||
struct command_buffer_chunk: public vk::command_buffer
|
struct command_buffer_chunk: public vk::command_buffer
|
||||||
{
|
{
|
||||||
VkFence submit_fence = VK_NULL_HANDLE;
|
VkFence submit_fence = VK_NULL_HANDLE;
|
||||||
VkDevice m_device = VK_NULL_HANDLE;
|
VkDevice m_device = VK_NULL_HANDLE;
|
||||||
|
|
||||||
|
u32 num_draws = 0;
|
||||||
|
u32 flags = 0;
|
||||||
|
|
||||||
std::atomic_bool pending = { false };
|
std::atomic_bool pending = { false };
|
||||||
std::atomic<u64> last_sync = { 0 };
|
std::atomic<u64> last_sync = { 0 };
|
||||||
shared_mutex guard_mutex;
|
shared_mutex guard_mutex;
|
||||||
|
@ -90,12 +98,17 @@ struct command_buffer_chunk: public vk::command_buffer
|
||||||
wait();
|
wait();
|
||||||
|
|
||||||
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
||||||
|
num_draws = 0;
|
||||||
|
flags = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool poke()
|
bool poke()
|
||||||
{
|
{
|
||||||
reader_lock lock(guard_mutex);
|
reader_lock lock(guard_mutex);
|
||||||
|
|
||||||
|
if (!pending)
|
||||||
|
return true;
|
||||||
|
|
||||||
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
|
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
|
||||||
{
|
{
|
||||||
lock.upgrade();
|
lock.upgrade();
|
||||||
|
@ -117,14 +130,8 @@ struct command_buffer_chunk: public vk::command_buffer
|
||||||
if (!pending)
|
if (!pending)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
switch(vkGetFenceStatus(m_device, submit_fence))
|
// NOTE: vkWaitForFences is slower than polling fence status at least on NV
|
||||||
{
|
while (vkGetFenceStatus(m_device, submit_fence) == VK_NOT_READY);
|
||||||
case VK_SUCCESS:
|
|
||||||
break;
|
|
||||||
case VK_NOT_READY:
|
|
||||||
CHECK_RESULT(vkWaitForFences(m_device, 1, &submit_fence, VK_TRUE, UINT64_MAX));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
lock.upgrade();
|
lock.upgrade();
|
||||||
|
|
||||||
|
@ -406,6 +413,8 @@ public:
|
||||||
void write_buffers();
|
void write_buffers();
|
||||||
void set_viewport();
|
void set_viewport();
|
||||||
|
|
||||||
|
void sync_hint(rsx::FIFO_hint hint) override;
|
||||||
|
|
||||||
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||||
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||||
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
||||||
|
|
|
@ -521,9 +521,10 @@ namespace rsx
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rsx->sync();
|
// Defer conditional render evaluation
|
||||||
vm::ptr<CellGcmReportData> result = address_ptr;
|
rsx->sync_hint(FIFO_hint::hint_conditional_render_eval);
|
||||||
rsx->conditional_render_test_failed = (result->value == 0);
|
rsx->conditional_render_test_address = address_ptr;
|
||||||
|
rsx->conditional_render_test_failed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_zcull_render_enable(thread* rsx, u32, u32 arg)
|
void set_zcull_render_enable(thread* rsx, u32, u32 arg)
|
||||||
|
@ -1809,8 +1810,6 @@ namespace rsx
|
||||||
bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>();
|
bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>();
|
||||||
bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>();
|
bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>();
|
||||||
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
|
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
|
||||||
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
|
|
||||||
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
|
|
||||||
bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>();
|
bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>();
|
||||||
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
|
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
|
||||||
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
|
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue