rsx: Implement dynamic vertex offset updates

- Vertex offsets can be updated mid-draw to dynamically render different meshes without breaking up draws
This commit is contained in:
kd-11 2021-06-30 02:05:25 +03:00 committed by kd-11
parent 9b050a33e4
commit 194bfc54d1
4 changed files with 130 additions and 88 deletions

View file

@ -428,39 +428,41 @@ void GLGSRender::emit_geometry(u32 sub_index)
} }
}; };
if (!sub_index) auto& draw_call = rsx::method_registers.current_draw_clause;
const rsx::flags32_t vertex_state_mask = rsx::vertex_base_changed | rsx::vertex_arrays_changed;
const rsx::flags32_t vertex_state = (sub_index == 0) ? rsx::vertex_arrays_changed : draw_call.execute_pipeline_dependencies() & vertex_state_mask;
if (vertex_state & rsx::vertex_arrays_changed)
{ {
analyse_inputs_interleaved(m_vertex_layout); analyse_inputs_interleaved(m_vertex_layout);
if (!m_vertex_layout.validate()) }
else if (vertex_state & rsx::vertex_base_changed)
{
// Rebase vertex bases instead of
for (auto& info : m_vertex_layout.interleaved_blocks)
{ {
// Execute remainining pipeline barriers with NOP draw const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
do info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
{
rsx::method_registers.current_draw_clause.execute_pipeline_dependencies();
}
while (rsx::method_registers.current_draw_clause.next());
rsx::method_registers.current_draw_clause.end();
return;
} }
} }
else
if (vertex_state && !m_vertex_layout.validate())
{ {
if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed) // No vertex inputs enabled
// Execute remainining pipeline barriers with NOP draw
do
{ {
// Rebase vertex bases instead of draw_call.execute_pipeline_dependencies();
for (auto &info : m_vertex_layout.interleaved_blocks) } while (draw_call.next());
{
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); draw_call.end();
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); return;
}
}
} }
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
{ {
//Use approximations to reserve space. This path is mostly for debug purposes anyway //Use approximations to reserve space. This path is mostly for debug purposes anyway
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); u32 approx_vertex_count = draw_call.get_elements_count();
u32 approx_working_buffer_size = approx_vertex_count * 256; u32 approx_working_buffer_size = approx_vertex_count * 256;
//Allocate 256K heap if we have no approximation at this time (inlined array) //Allocate 256K heap if we have no approximation at this time (inlined array)
@ -478,18 +480,18 @@ void GLGSRender::emit_geometry(u32 sub_index)
return; return;
} }
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); const GLenum draw_mode = gl::draw_mode(draw_call.primitive);
update_vertex_env(upload_info); update_vertex_env(upload_info);
if (!upload_info.index_info) if (!upload_info.index_info)
{ {
if (rsx::method_registers.current_draw_clause.is_single_draw()) if (draw_call.is_single_draw())
{ {
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count); glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
} }
else else
{ {
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); const auto subranges = draw_call.get_subranges();
const auto draw_count = subranges.size(); const auto draw_count = subranges.size();
const auto driver_caps = gl::get_driver_caps(); const auto driver_caps = gl::get_driver_caps();
bool use_draw_arrays_fallback = false; bool use_draw_arrays_fallback = false;
@ -542,7 +544,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
{ {
const GLenum index_type = std::get<0>(*upload_info.index_info); const GLenum index_type = std::get<0>(*upload_info.index_info);
const u32 index_offset = std::get<1>(*upload_info.index_info); const u32 index_offset = std::get<1>(*upload_info.index_info);
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive; const bool restarts_valid = gl::is_primitive_native(draw_call.primitive) && !draw_call.is_disjoint_primitive;
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{ {
@ -551,13 +553,13 @@ void GLGSRender::emit_geometry(u32 sub_index)
m_index_ring_buffer->bind(); m_index_ring_buffer->bind();
if (rsx::method_registers.current_draw_clause.is_single_draw()) if (draw_call.is_single_draw())
{ {
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast<GLvoid*>(u64{index_offset})); glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast<GLvoid*>(u64{index_offset}));
} }
else else
{ {
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges(); const auto subranges = draw_call.get_subranges();
const auto draw_count = subranges.size(); const auto draw_count = subranges.size();
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
uptr index_ptr = index_offset; uptr index_ptr = index_offset;
@ -569,7 +571,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
for (const auto &range : subranges) for (const auto &range : subranges)
{ {
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); const auto index_size = get_index_count(draw_call.primitive, range.count);
counts[dst_index] = index_size; counts[dst_index] = index_size;
offsets[dst_index++] = reinterpret_cast<const GLvoid*>(index_ptr); offsets[dst_index++] = reinterpret_cast<const GLvoid*>(index_ptr);

View file

@ -703,34 +703,37 @@ void VKGSRender::emit_geometry(u32 sub_index)
auto &draw_call = rsx::method_registers.current_draw_clause; auto &draw_call = rsx::method_registers.current_draw_clause;
m_profiler.start(); m_profiler.start();
if (sub_index == 0) const flags32_t vertex_state_mask = rsx::vertex_base_changed | rsx::vertex_arrays_changed;
const flags32_t vertex_state = (sub_index == 0) ? rsx::vertex_arrays_changed : draw_call.execute_pipeline_dependencies() & vertex_state_mask;
if (vertex_state & rsx::vertex_arrays_changed)
{ {
analyse_inputs_interleaved(m_vertex_layout); analyse_inputs_interleaved(m_vertex_layout);
if (!m_vertex_layout.validate())
{
// No vertex inputs enabled
// Execute remainining pipeline barriers with NOP draw
do
{
draw_call.execute_pipeline_dependencies();
}
while (draw_call.next());
draw_call.end();
return;
}
} }
else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed) else if (vertex_state & rsx::vertex_base_changed)
{ {
// Rebase vertex bases instead of // Rebase vertex bases instead of
for (auto &info : m_vertex_layout.interleaved_blocks) for (auto& info : m_vertex_layout.interleaved_blocks)
{ {
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
} }
} }
if (vertex_state && !m_vertex_layout.validate())
{
// No vertex inputs enabled
// Execute remainining pipeline barriers with NOP draw
do
{
draw_call.execute_pipeline_dependencies();
}
while (draw_call.next());
draw_call.end();
return;
}
const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;

View file

@ -779,6 +779,24 @@ namespace rsx
} }
} }
template<u32 index>
struct set_vertex_array_offset
{
static void impl(thread* rsx, u32 reg, u32 arg)
{
if (rsx->in_begin_end &&
!rsx::method_registers.current_draw_clause.empty() &&
reg != method_registers.register_previous_value)
{
// Revert change to queue later
method_registers.decode(reg, method_registers.register_previous_value);
// Insert offset mofifier barrier
method_registers.current_draw_clause.insert_command_barrier(vertex_array_offset_modifier_barrier, arg, index);
}
}
};
void check_index_array_dma(thread* rsx, u32 reg, u32 arg) void check_index_array_dma(thread* rsx, u32 reg, u32 arg)
{ {
// Check if either location or index type are invalid // Check if either location or index type are invalid
@ -2637,6 +2655,56 @@ namespace rsx
return registers[reg] == value; return registers[reg] == value;
} }
void draw_clause::insert_command_barrier(command_barrier_type type, u32 arg, u32 index)
{
ensure(!draw_command_ranges.empty());
auto _do_barrier_insert = [this](barrier_t&& val)
{
if (draw_command_barriers.empty() || draw_command_barriers.back() < val)
{
draw_command_barriers.push_back(val);
return;
}
for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++)
{
if (*it < val)
{
continue;
}
draw_command_barriers.insert(it, val);
break;
}
};
if (type == primitive_restart_barrier)
{
// Rasterization flow barrier
const auto& last = draw_command_ranges[current_range_index];
const auto address = last.first + last.count;
_do_barrier_insert({ current_range_index, 0, address, index, arg, 0, type });
}
else
{
// Execution dependency barrier. Requires breaking the current draw call sequence and start another.
if (draw_command_ranges.back().count > 0)
{
append_draw_command({});
}
else
{
// In case of back-to-back modifiers, do not add duplicates
current_range_index = draw_command_ranges.size() - 1;
}
_do_barrier_insert({ current_range_index, get_system_time(), ~0u, index, arg, 0, type });
last_execution_barrier_index = current_range_index;
}
}
void draw_clause::reset(primitive_type type) void draw_clause::reset(primitive_type type)
{ {
current_range_index = ~0u; current_range_index = ~0u;
@ -2676,6 +2744,11 @@ namespace rsx
method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg); method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg);
result |= vertex_base_changed; result |= vertex_base_changed;
break; break;
case vertex_array_offset_modifier_barrier:
// Change vertex array offset
method_registers.decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg);
result |= vertex_arrays_changed;
break;
default: default:
fmt::throw_exception("Unreachable"); fmt::throw_exception("Unreachable");
} }
@ -3187,6 +3260,7 @@ namespace rsx
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>(); bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>(); bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>();
bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>(); bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>();
bind_range<NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 1, 16, nv4097::set_vertex_array_offset>();
bind<NV4097_SET_USER_CLIP_PLANE_CONTROL, nv4097::notify_state_changed<vertex_state_dirty>>(); bind<NV4097_SET_USER_CLIP_PLANE_CONTROL, nv4097::notify_state_changed<vertex_state_dirty>>();
bind<NV4097_SET_TRANSFORM_BRANCH_BITS, nv4097::notify_state_changed<vertex_state_dirty>>(); bind<NV4097_SET_TRANSFORM_BRANCH_BITS, nv4097::notify_state_changed<vertex_state_dirty>>();
bind<NV4097_SET_CLIP_MIN, nv4097::notify_state_changed<invalidate_zclip_bits>>(); bind<NV4097_SET_CLIP_MIN, nv4097::notify_state_changed<invalidate_zclip_bits>>();

View file

@ -24,13 +24,15 @@ namespace rsx
{ {
primitive_restart_barrier, primitive_restart_barrier,
vertex_base_modifier_barrier, vertex_base_modifier_barrier,
index_base_modifier_barrier index_base_modifier_barrier,
vertex_array_offset_modifier_barrier
}; };
enum command_execution_flags : u32 enum command_execution_flags : u32
{ {
vertex_base_changed = (1 << 0), vertex_base_changed = (1 << 0),
index_base_changed = (1 << 1) index_base_changed = (1 << 1),
vertex_arrays_changed = (1 << 2),
}; };
struct barrier_t struct barrier_t
@ -39,6 +41,7 @@ namespace rsx
u64 timestamp; u64 timestamp;
u32 address; u32 address;
u32 index;
u32 arg; u32 arg;
u32 flags; u32 flags;
command_barrier_type type; command_barrier_type type;
@ -112,47 +115,7 @@ namespace rsx
simple_array<u32> inline_vertex_array{}; simple_array<u32> inline_vertex_array{};
void insert_command_barrier(command_barrier_type type, u32 arg) void insert_command_barrier(command_barrier_type type, u32 arg, u32 register_index = 0);
{
ensure(!draw_command_ranges.empty());
auto _do_barrier_insert = [this](barrier_t&& val)
{
if (draw_command_barriers.empty() || draw_command_barriers.back() < val)
{
draw_command_barriers.push_back(val);
return;
}
for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++)
{
if (*it < val)
{
continue;
}
draw_command_barriers.insert(it, val);
break;
}
};
if (type == primitive_restart_barrier)
{
// Rasterization flow barrier
const auto& last = draw_command_ranges[current_range_index];
const auto address = last.first + last.count;
_do_barrier_insert({ current_range_index, 0, address, arg, 0, type });
}
else
{
// Execution dependency barrier
append_draw_command({});
_do_barrier_insert({ current_range_index, get_system_time(), ~0u, arg, 0, type });
last_execution_barrier_index = current_range_index;
}
}
/** /**
* Optimize commands for rendering * Optimize commands for rendering