mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-06 23:11:25 +12:00
rsx: ZCULL synchronization fixes
- Track asynchronous operations in RSX core - Add read barriers to force pending writes to finish. Fixes zcull delay flicker in all UE3 titles without forcing hard stall - Increase zcull latency as all writes should be synchronized now
This commit is contained in:
parent
315798b1f4
commit
2dce55d036
6 changed files with 87 additions and 24 deletions
|
@ -1574,6 +1574,11 @@ void GLGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GLGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||||
|
{
|
||||||
|
glEndQuery(GL_ANY_SAMPLES_PASSED);
|
||||||
|
}
|
||||||
|
|
||||||
void GLGSRender::shell_do_cleanup()
|
void GLGSRender::shell_do_cleanup()
|
||||||
{
|
{
|
||||||
//TODO: Key cleanup requests with UID to identify resources to remove
|
//TODO: Key cleanup requests with UID to identify resources to remove
|
||||||
|
|
|
@ -367,6 +367,7 @@ public:
|
||||||
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||||
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
||||||
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
||||||
|
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void begin() override;
|
void begin() override;
|
||||||
|
|
|
@ -2092,7 +2092,7 @@ namespace rsx
|
||||||
|
|
||||||
//Reset zcull ctrl
|
//Reset zcull ctrl
|
||||||
zcull_ctrl->set_active(this, false);
|
zcull_ctrl->set_active(this, false);
|
||||||
zcull_ctrl->clear();
|
zcull_ctrl->clear(this);
|
||||||
|
|
||||||
if (zcull_ctrl->has_pending())
|
if (zcull_ctrl->has_pending())
|
||||||
{
|
{
|
||||||
|
@ -2142,7 +2142,7 @@ namespace rsx
|
||||||
if (g_cfg.video.disable_zcull_queries)
|
if (g_cfg.video.disable_zcull_queries)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
zcull_ctrl->clear();
|
zcull_ctrl->clear(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::get_zcull_stats(u32 type, vm::addr_t sink)
|
void thread::get_zcull_stats(u32 type, vm::addr_t sink)
|
||||||
|
@ -2153,18 +2153,13 @@ namespace rsx
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case CELL_GCM_ZPASS_PIXEL_CNT:
|
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||||
{
|
|
||||||
zcull_ctrl->read_report(this, sink, type);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case CELL_GCM_ZCULL_STATS:
|
case CELL_GCM_ZCULL_STATS:
|
||||||
case CELL_GCM_ZCULL_STATS1:
|
case CELL_GCM_ZCULL_STATS1:
|
||||||
case CELL_GCM_ZCULL_STATS2:
|
case CELL_GCM_ZCULL_STATS2:
|
||||||
case CELL_GCM_ZCULL_STATS3:
|
case CELL_GCM_ZCULL_STATS3:
|
||||||
{
|
{
|
||||||
//TODO
|
zcull_ctrl->read_report(this, sink, type);
|
||||||
value = (type != CELL_GCM_ZCULL_STATS3)? UINT16_MAX : 0;
|
return;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
|
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
|
||||||
|
@ -2181,6 +2176,14 @@ namespace rsx
|
||||||
void thread::sync()
|
void thread::sync()
|
||||||
{
|
{
|
||||||
zcull_ctrl->sync(this);
|
zcull_ctrl->sync(this);
|
||||||
|
|
||||||
|
_mm_mfence();
|
||||||
|
verify (HERE), async_tasks_pending.load() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void thread::read_barrier(u32 memory_address, u32 memory_range)
|
||||||
|
{
|
||||||
|
zcull_ctrl->read_barrier(this, memory_address, memory_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::notify_zcull_info_changed()
|
void thread::notify_zcull_info_changed()
|
||||||
|
@ -2328,6 +2331,7 @@ namespace rsx
|
||||||
|
|
||||||
m_pending_writes.push_back({});
|
m_pending_writes.push_back({});
|
||||||
m_pending_writes.back().query = m_current_task;
|
m_pending_writes.back().query = m_current_task;
|
||||||
|
ptimer->async_tasks_pending++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2342,7 +2346,7 @@ namespace rsx
|
||||||
|
|
||||||
void ZCULL_control::read_report(::rsx::thread* ptimer, vm::addr_t sink, u32 type)
|
void ZCULL_control::read_report(::rsx::thread* ptimer, vm::addr_t sink, u32 type)
|
||||||
{
|
{
|
||||||
if (m_current_task)
|
if (m_current_task && type == CELL_GCM_ZPASS_PIXEL_CNT)
|
||||||
{
|
{
|
||||||
m_current_task->owned = true;
|
m_current_task->owned = true;
|
||||||
end_occlusion_query(m_current_task);
|
end_occlusion_query(m_current_task);
|
||||||
|
@ -2384,6 +2388,8 @@ namespace rsx
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ptimer->async_tasks_pending++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::allocate_new_query(::rsx::thread* ptimer)
|
void ZCULL_control::allocate_new_query(::rsx::thread* ptimer)
|
||||||
|
@ -2436,7 +2442,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::clear()
|
void ZCULL_control::clear(class ::rsx::thread* ptimer)
|
||||||
{
|
{
|
||||||
if (!m_pending_writes.empty())
|
if (!m_pending_writes.empty())
|
||||||
{
|
{
|
||||||
|
@ -2449,6 +2455,7 @@ namespace rsx
|
||||||
discard_occlusion_query(It->query);
|
discard_occlusion_query(It->query);
|
||||||
It->query->pending = false;
|
It->query->pending = false;
|
||||||
valid_size--;
|
valid_size--;
|
||||||
|
ptimer->async_tasks_pending--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2470,9 +2477,27 @@ namespace rsx
|
||||||
m_cycles_delay = max_zcull_cycles_delay;
|
m_cycles_delay = max_zcull_cycles_delay;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 value)
|
void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 type, u32 value)
|
||||||
{
|
{
|
||||||
verify(HERE), sink;
|
verify(HERE), sink;
|
||||||
|
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||||
|
value = value ? UINT16_MAX : 0;
|
||||||
|
break;
|
||||||
|
case CELL_GCM_ZCULL_STATS3:
|
||||||
|
value = value ? 0 : UINT16_MAX;
|
||||||
|
break;
|
||||||
|
case CELL_GCM_ZCULL_STATS2:
|
||||||
|
case CELL_GCM_ZCULL_STATS1:
|
||||||
|
case CELL_GCM_ZCULL_STATS:
|
||||||
|
default:
|
||||||
|
//Not implemented
|
||||||
|
value = UINT32_MAX;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
vm::ptr<CellGcmReportData> out = sink;
|
vm::ptr<CellGcmReportData> out = sink;
|
||||||
out->value = value;
|
out->value = value;
|
||||||
out->timer = timestamp;
|
out->timer = timestamp;
|
||||||
|
@ -2520,7 +2545,7 @@ namespace rsx
|
||||||
|
|
||||||
if (!writer.forwarder)
|
if (!writer.forwarder)
|
||||||
//No other queries in the chain, write result
|
//No other queries in the chain, write result
|
||||||
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
|
write(writer.sink, ptimer->timestamp(), writer.type, result);
|
||||||
|
|
||||||
processed++;
|
processed++;
|
||||||
}
|
}
|
||||||
|
@ -2555,10 +2580,13 @@ namespace rsx
|
||||||
else
|
else
|
||||||
It = m_statistics_map.erase(It);
|
It = m_statistics_map.erase(It);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Decrement jobs counter
|
||||||
|
ptimer->async_tasks_pending -= processed;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available
|
//Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available
|
||||||
m_cycles_delay = 2;
|
m_cycles_delay = min_zcull_cycles_delay;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::update(::rsx::thread* ptimer)
|
void ZCULL_control::update(::rsx::thread* ptimer)
|
||||||
|
@ -2644,7 +2672,7 @@ namespace rsx
|
||||||
//only zpass supported right now
|
//only zpass supported right now
|
||||||
if (!writer.forwarder)
|
if (!writer.forwarder)
|
||||||
//No other queries in the chain, write result
|
//No other queries in the chain, write result
|
||||||
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
|
write(writer.sink, ptimer->timestamp(), writer.type, result);
|
||||||
|
|
||||||
processed++;
|
processed++;
|
||||||
}
|
}
|
||||||
|
@ -2669,6 +2697,24 @@ namespace rsx
|
||||||
{
|
{
|
||||||
m_pending_writes.resize(0);
|
m_pending_writes.resize(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ptimer->async_tasks_pending -= processed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ZCULL_control::read_barrier(::rsx::thread* ptimer, u32 memory_address, u32 memory_range)
|
||||||
|
{
|
||||||
|
if (m_pending_writes.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
const auto memory_end = memory_address + memory_range;
|
||||||
|
for (const auto &writer : m_pending_writes)
|
||||||
|
{
|
||||||
|
if (writer.sink >= memory_address && writer.sink < memory_end)
|
||||||
|
{
|
||||||
|
sync(ptimer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -185,9 +185,8 @@ namespace rsx
|
||||||
struct ZCULL_control
|
struct ZCULL_control
|
||||||
{
|
{
|
||||||
//Delay in 'cycles' before a report update operation is forced to retire
|
//Delay in 'cycles' before a report update operation is forced to retire
|
||||||
//Larger values might give more performance but some engines (UE3) dont seem to wait for results and will flicker
|
const u32 max_zcull_cycles_delay = 128;
|
||||||
//TODO: Determine the real max delay in real hardware
|
const u32 min_zcull_cycles_delay = 16;
|
||||||
const u32 max_zcull_cycles_delay = 10;
|
|
||||||
|
|
||||||
//Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
//Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
||||||
const u32 occlusion_query_count = 128;
|
const u32 occlusion_query_count = 128;
|
||||||
|
@ -200,7 +199,7 @@ namespace rsx
|
||||||
occlusion_query_info* m_current_task = nullptr;
|
occlusion_query_info* m_current_task = nullptr;
|
||||||
u32 m_statistics_tag_id = 0;
|
u32 m_statistics_tag_id = 0;
|
||||||
u32 m_tsc = 0;
|
u32 m_tsc = 0;
|
||||||
u32 m_cycles_delay = 10;
|
u32 m_cycles_delay = max_zcull_cycles_delay;
|
||||||
|
|
||||||
std::vector<queued_report_write> m_pending_writes;
|
std::vector<queued_report_write> m_pending_writes;
|
||||||
std::unordered_map<u32, u32> m_statistics_map;
|
std::unordered_map<u32, u32> m_statistics_map;
|
||||||
|
@ -211,7 +210,7 @@ namespace rsx
|
||||||
void set_enabled(class ::rsx::thread* ptimer, bool enabled);
|
void set_enabled(class ::rsx::thread* ptimer, bool enabled);
|
||||||
void set_active(class ::rsx::thread* ptimer, bool active);
|
void set_active(class ::rsx::thread* ptimer, bool active);
|
||||||
|
|
||||||
void write(vm::addr_t sink, u32 timestamp, u32 value);
|
void write(vm::addr_t sink, u32 timestamp, u32 type, u32 value);
|
||||||
|
|
||||||
//Read current zcull statistics into the address provided
|
//Read current zcull statistics into the address provided
|
||||||
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
|
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
|
||||||
|
@ -220,11 +219,14 @@ namespace rsx
|
||||||
void allocate_new_query(class ::rsx::thread* ptimer);
|
void allocate_new_query(class ::rsx::thread* ptimer);
|
||||||
|
|
||||||
//clears current stat block and increments stat_tag_id
|
//clears current stat block and increments stat_tag_id
|
||||||
void clear();
|
void clear(class ::rsx::thread* ptimer);
|
||||||
|
|
||||||
//forcefully flushes all
|
//forcefully flushes all
|
||||||
void sync(class ::rsx::thread* ptimer);
|
void sync(class ::rsx::thread* ptimer);
|
||||||
|
|
||||||
|
//conditionally sync any pending writes if range overlaps
|
||||||
|
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
|
||||||
|
|
||||||
//call once every 'tick' to update
|
//call once every 'tick' to update
|
||||||
void update(class ::rsx::thread* ptimer);
|
void update(class ::rsx::thread* ptimer);
|
||||||
|
|
||||||
|
@ -367,6 +369,8 @@ namespace rsx
|
||||||
bool sync_point_request = false;
|
bool sync_point_request = false;
|
||||||
bool in_begin_end = false;
|
bool in_begin_end = false;
|
||||||
|
|
||||||
|
atomic_t<s32> async_tasks_pending{ 0 };
|
||||||
|
|
||||||
bool conditional_render_test_failed = false;
|
bool conditional_render_test_failed = false;
|
||||||
bool conditional_render_enabled = false;
|
bool conditional_render_enabled = false;
|
||||||
bool zcull_stats_enabled = false;
|
bool zcull_stats_enabled = false;
|
||||||
|
@ -412,6 +416,7 @@ namespace rsx
|
||||||
|
|
||||||
//sync
|
//sync
|
||||||
void sync();
|
void sync();
|
||||||
|
void read_barrier(u32 memory_address, u32 memory_range);
|
||||||
|
|
||||||
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
||||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||||
|
|
|
@ -724,7 +724,7 @@ std::string rsx::get_method_name(const u32 id)
|
||||||
return std::string("CELL_GCM_") + found->second;
|
return std::string("CELL_GCM_") + found->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt::format("Unknown/illegal method [0x%08x]", id);
|
return fmt::format("Unknown/illegal method [0x%08x]", id << 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Various parameter pretty printing function
|
// Various parameter pretty printing function
|
||||||
|
|
|
@ -689,6 +689,9 @@ namespace rsx
|
||||||
in_pitch = in_bpp * in_w;
|
in_pitch = in_bpp * in_w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto read_address = get_address(src_offset, src_dma);
|
||||||
|
rsx->read_barrier(read_address, in_pitch * in_h);
|
||||||
|
|
||||||
if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 &&
|
if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 &&
|
||||||
dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8)
|
dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8)
|
||||||
{
|
{
|
||||||
|
@ -933,7 +936,7 @@ namespace rsx
|
||||||
|
|
||||||
namespace nv0039
|
namespace nv0039
|
||||||
{
|
{
|
||||||
void buffer_notify(thread*, u32, u32 arg)
|
void buffer_notify(thread *rsx, u32, u32 arg)
|
||||||
{
|
{
|
||||||
s32 in_pitch = method_registers.nv0039_input_pitch();
|
s32 in_pitch = method_registers.nv0039_input_pitch();
|
||||||
s32 out_pitch = method_registers.nv0039_output_pitch();
|
s32 out_pitch = method_registers.nv0039_output_pitch();
|
||||||
|
@ -968,8 +971,11 @@ namespace rsx
|
||||||
u32 dst_offset = method_registers.nv0039_output_offset();
|
u32 dst_offset = method_registers.nv0039_output_offset();
|
||||||
u32 dst_dma = method_registers.nv0039_output_location();
|
u32 dst_dma = method_registers.nv0039_output_location();
|
||||||
|
|
||||||
|
const auto read_address = get_address(src_offset, src_dma);
|
||||||
|
rsx->read_barrier(read_address, in_pitch * line_count);
|
||||||
|
|
||||||
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
|
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
|
||||||
const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));
|
const u8 *src = (u8*)vm::base(read_address);
|
||||||
|
|
||||||
if (in_pitch == out_pitch && out_pitch == line_length)
|
if (in_pitch == out_pitch && out_pitch == line_length)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue