mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 18:58:36 +12:00
zcull synchronization tweaks
- Implement forced reading when calling update method to sync partial lists - Defer conditional render evaluation and use a read barrier to avoid extra work - Fix HLE gcm library when binding tiles & zcull RAM
This commit is contained in:
parent
3b47e43380
commit
8800c10476
6 changed files with 116 additions and 33 deletions
|
@ -698,6 +698,7 @@ void cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart,
|
|||
zcull.sFunc = sFunc;
|
||||
zcull.sRef = sRef;
|
||||
zcull.sMask = sMask;
|
||||
zcull.binded = (zCullFormat > 0);
|
||||
|
||||
vm::_ptr<CellGcmZcullInfo>(m_config->zculls_addr)[index] = zcull.pack();
|
||||
}
|
||||
|
@ -1261,6 +1262,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co
|
|||
tile.comp = comp;
|
||||
tile.base = base;
|
||||
tile.bank = bank;
|
||||
tile.binded = (pitch > 0);
|
||||
|
||||
vm::_ptr<CellGcmTileInfo>(m_config->tiles_addr)[index] = tile.pack();
|
||||
return CELL_OK;
|
||||
|
|
|
@ -275,6 +275,15 @@ namespace rsx
|
|||
|
||||
void thread::begin()
|
||||
{
|
||||
if (conditional_render_enabled && conditional_render_test_address)
|
||||
{
|
||||
// Evaluate conditional rendering test
|
||||
zcull_ctrl->read_barrier(this, conditional_render_test_address, 4);
|
||||
vm::ptr<CellGcmReportData> result = vm::cast(conditional_render_test_address);
|
||||
conditional_render_test_failed = (result->value == 0);
|
||||
conditional_render_test_address = 0;
|
||||
}
|
||||
|
||||
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
|
||||
in_begin_end = true;
|
||||
|
||||
|
@ -2719,7 +2728,8 @@ namespace rsx
|
|||
{
|
||||
verify(HERE), query->pending;
|
||||
|
||||
if (!result && query->num_draws)
|
||||
const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3);
|
||||
if (implemented && !result && query->num_draws)
|
||||
{
|
||||
get_occlusion_query_result(query);
|
||||
|
||||
|
@ -2784,13 +2794,23 @@ namespace rsx
|
|||
m_cycles_delay = min_zcull_cycles_delay;
|
||||
}
|
||||
|
||||
void ZCULL_control::update(::rsx::thread* ptimer)
|
||||
void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address)
|
||||
{
|
||||
m_tsc++;
|
||||
|
||||
if (m_pending_writes.empty())
|
||||
return;
|
||||
|
||||
if (!sync_address)
|
||||
{
|
||||
const auto& front = m_pending_writes.front();
|
||||
if (!front.sink || m_tsc < front.due_tsc)
|
||||
{
|
||||
// Avoid spamming backend with report status updates
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
u32 stat_tag_to_remove = m_statistics_tag_id;
|
||||
u32 processed = 0;
|
||||
for (auto &writer : m_pending_writes)
|
||||
|
@ -2810,13 +2830,21 @@ namespace rsx
|
|||
auto query = writer.query;
|
||||
u32 result = m_statistics_map[writer.counter_tag];
|
||||
|
||||
const bool force_read = (sync_address != 0);
|
||||
if (force_read && writer.sink == sync_address)
|
||||
{
|
||||
// Forced reads end here
|
||||
sync_address = 0;
|
||||
}
|
||||
|
||||
if (query)
|
||||
{
|
||||
verify(HERE), query->pending;
|
||||
|
||||
if (UNLIKELY(writer.due_tsc < m_tsc))
|
||||
const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3);
|
||||
if (force_read || writer.due_tsc < m_tsc)
|
||||
{
|
||||
if (!result && query->num_draws)
|
||||
if (implemented && !result && query->num_draws)
|
||||
{
|
||||
get_occlusion_query_result(query);
|
||||
|
||||
|
@ -2834,12 +2862,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
if (result || !query->num_draws)
|
||||
{
|
||||
//Not necessary to read the result anymore
|
||||
discard_occlusion_query(query);
|
||||
}
|
||||
else
|
||||
if (implemented && !result && query->num_draws)
|
||||
{
|
||||
//Maybe we get lucky and results are ready
|
||||
if (check_occlusion_query_status(query))
|
||||
|
@ -2857,6 +2880,11 @@ namespace rsx
|
|||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//Not necessary to read the result anymore
|
||||
discard_occlusion_query(query);
|
||||
}
|
||||
}
|
||||
|
||||
query->pending = false;
|
||||
|
@ -2903,14 +2931,20 @@ namespace rsx
|
|||
return;
|
||||
|
||||
const auto memory_end = memory_address + memory_range;
|
||||
u32 sync_address = 0;
|
||||
|
||||
for (const auto &writer : m_pending_writes)
|
||||
{
|
||||
if (writer.sink >= memory_address && writer.sink < memory_end)
|
||||
{
|
||||
sync(ptimer);
|
||||
return;
|
||||
sync_address = writer.sink;
|
||||
}
|
||||
}
|
||||
|
||||
if (sync_address)
|
||||
{
|
||||
update(ptimer, sync_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,6 +118,11 @@ namespace rsx
|
|||
lock_wait = 4 // Puller is processing a lock acquire
|
||||
};
|
||||
|
||||
enum FIFO_hint : u8
|
||||
{
|
||||
hint_conditional_render_eval = 1
|
||||
};
|
||||
|
||||
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
|
||||
|
||||
u32 get_address(u32 offset, u32 location);
|
||||
|
@ -230,7 +235,7 @@ namespace rsx
|
|||
queued_report_write* forwarder;
|
||||
vm::addr_t sink;
|
||||
|
||||
u32 due_tsc;
|
||||
u64 due_tsc;
|
||||
};
|
||||
|
||||
struct ZCULL_control
|
||||
|
@ -249,7 +254,7 @@ namespace rsx
|
|||
|
||||
occlusion_query_info* m_current_task = nullptr;
|
||||
u32 m_statistics_tag_id = 0;
|
||||
u32 m_tsc = 0;
|
||||
u64 m_tsc = 0;
|
||||
u32 m_cycles_delay = max_zcull_cycles_delay;
|
||||
|
||||
std::vector<queued_report_write> m_pending_writes;
|
||||
|
@ -278,8 +283,8 @@ namespace rsx
|
|||
// Conditionally sync any pending writes if range overlaps
|
||||
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
|
||||
|
||||
// Call once every 'tick' to update
|
||||
void update(class ::rsx::thread* ptimer);
|
||||
// Call once every 'tick' to update, optional address provided to partially sync until address is processed
|
||||
void update(class ::rsx::thread* ptimer, u32 sync_address = 0);
|
||||
|
||||
// Draw call notification
|
||||
void on_draw();
|
||||
|
@ -433,6 +438,7 @@ namespace rsx
|
|||
|
||||
atomic_t<s32> async_tasks_pending{ 0 };
|
||||
|
||||
u32 conditional_render_test_address = 0;
|
||||
bool conditional_render_test_failed = false;
|
||||
bool conditional_render_enabled = false;
|
||||
bool zcull_stats_enabled = false;
|
||||
|
@ -482,6 +488,7 @@ namespace rsx
|
|||
// sync
|
||||
void sync();
|
||||
void read_barrier(u32 memory_address, u32 memory_range);
|
||||
virtual void sync_hint(FIFO_hint hint) {}
|
||||
|
||||
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||
|
|
|
@ -816,19 +816,29 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
|
||||
if (sync_timestamp > 0)
|
||||
{
|
||||
//Wait for earliest cb submitted after the sync timestamp to finish
|
||||
// Wait for earliest cb submitted after the sync timestamp to finish
|
||||
command_buffer_chunk *target_cb = nullptr;
|
||||
for (auto &cb : m_primary_cb_list)
|
||||
{
|
||||
if (cb.pending && cb.last_sync >= sync_timestamp)
|
||||
if (cb.last_sync >= sync_timestamp)
|
||||
{
|
||||
if (!cb.pending)
|
||||
{
|
||||
target_cb = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
|
||||
{
|
||||
target_cb = &cb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (target_cb)
|
||||
{
|
||||
target_cb->wait();
|
||||
}
|
||||
}
|
||||
|
||||
if (has_queue_ref)
|
||||
|
@ -1435,6 +1445,8 @@ void VKGSRender::end()
|
|||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
|
||||
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
||||
|
||||
m_current_command_buffer->flags |= cb_has_occlusion_task;
|
||||
}
|
||||
|
||||
if (!upload_info.index_info)
|
||||
|
@ -1486,6 +1498,7 @@ void VKGSRender::end()
|
|||
close_render_pass();
|
||||
vk::leave_uninterruptible();
|
||||
|
||||
m_current_command_buffer->num_draws++;
|
||||
m_rtts.on_write();
|
||||
|
||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||
|
@ -1834,6 +1847,22 @@ void VKGSRender::flush_command_queue(bool hard_sync)
|
|||
open_command_buffer();
|
||||
}
|
||||
|
||||
void VKGSRender::sync_hint(rsx::FIFO_hint hint)
|
||||
{
|
||||
if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
|
||||
{
|
||||
if (m_current_command_buffer->flags & cb_has_occlusion_task)
|
||||
{
|
||||
// Occlusion test result evaluation is coming up, avoid a hard sync
|
||||
if (!m_flush_requests.pending())
|
||||
{
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::advance_queued_frames()
|
||||
{
|
||||
//Check all other frames for completion and clear resources
|
||||
|
@ -3290,8 +3319,11 @@ void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|||
//Avoid stalling later if this query is already tied to a report
|
||||
if (query->num_draws && query->owned && !m_flush_requests.pending())
|
||||
{
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
if (0)//m_current_command_buffer->flags & cb_has_occlusion_task)
|
||||
{
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,11 +47,19 @@ namespace vk
|
|||
|
||||
extern u64 get_system_time();
|
||||
|
||||
enum command_buffer_data_flag
|
||||
{
|
||||
cb_has_occlusion_task = 1
|
||||
};
|
||||
|
||||
struct command_buffer_chunk: public vk::command_buffer
|
||||
{
|
||||
VkFence submit_fence = VK_NULL_HANDLE;
|
||||
VkDevice m_device = VK_NULL_HANDLE;
|
||||
|
||||
u32 num_draws = 0;
|
||||
u32 flags = 0;
|
||||
|
||||
std::atomic_bool pending = { false };
|
||||
std::atomic<u64> last_sync = { 0 };
|
||||
shared_mutex guard_mutex;
|
||||
|
@ -90,12 +98,17 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||
wait();
|
||||
|
||||
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
||||
num_draws = 0;
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
bool poke()
|
||||
{
|
||||
reader_lock lock(guard_mutex);
|
||||
|
||||
if (!pending)
|
||||
return true;
|
||||
|
||||
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
|
||||
{
|
||||
lock.upgrade();
|
||||
|
@ -117,14 +130,8 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||
if (!pending)
|
||||
return;
|
||||
|
||||
switch(vkGetFenceStatus(m_device, submit_fence))
|
||||
{
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_NOT_READY:
|
||||
CHECK_RESULT(vkWaitForFences(m_device, 1, &submit_fence, VK_TRUE, UINT64_MAX));
|
||||
break;
|
||||
}
|
||||
// NOTE: vkWaitForFences is slower than polling fence status at least on NV
|
||||
while (vkGetFenceStatus(m_device, submit_fence) == VK_NOT_READY);
|
||||
|
||||
lock.upgrade();
|
||||
|
||||
|
@ -406,6 +413,8 @@ public:
|
|||
void write_buffers();
|
||||
void set_viewport();
|
||||
|
||||
void sync_hint(rsx::FIFO_hint hint) override;
|
||||
|
||||
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
||||
|
|
|
@ -521,9 +521,10 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
rsx->sync();
|
||||
vm::ptr<CellGcmReportData> result = address_ptr;
|
||||
rsx->conditional_render_test_failed = (result->value == 0);
|
||||
// Defer conditional render evaluation
|
||||
rsx->sync_hint(FIFO_hint::hint_conditional_render_eval);
|
||||
rsx->conditional_render_test_address = address_ptr;
|
||||
rsx->conditional_render_test_failed = false;
|
||||
}
|
||||
|
||||
void set_zcull_render_enable(thread* rsx, u32, u32 arg)
|
||||
|
@ -1809,8 +1810,6 @@ namespace rsx
|
|||
bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>();
|
||||
bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>();
|
||||
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
|
||||
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
|
||||
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
|
||||
bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>();
|
||||
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
|
||||
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue