mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 14:31:24 +12:00
gl: Add support for hardware instancing
This commit is contained in:
parent
3d3fc2f3cd
commit
65c0d3d425
6 changed files with 133 additions and 52 deletions
|
@ -599,7 +599,11 @@ void GLGSRender::emit_geometry(u32 sub_index)
|
|||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (draw_call.is_single_draw())
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
glDrawArraysInstanced(draw_mode, 0, upload_info.vertex_draw_count, draw_call.pass_count());
|
||||
}
|
||||
else if (draw_call.is_single_draw())
|
||||
{
|
||||
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
||||
}
|
||||
|
@ -667,7 +671,11 @@ void GLGSRender::emit_geometry(u32 sub_index)
|
|||
|
||||
m_index_ring_buffer->bind();
|
||||
|
||||
if (draw_call.is_single_draw())
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
glDrawElementsInstanced(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast<GLvoid*>(u64{ index_offset }), draw_call.pass_count());
|
||||
}
|
||||
else if (draw_call.is_single_draw())
|
||||
{
|
||||
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast<GLvoid*>(u64{index_offset}));
|
||||
}
|
||||
|
@ -781,13 +789,20 @@ void GLGSRender::end()
|
|||
m_program->validate();
|
||||
}
|
||||
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
auto& draw_call = REGS(m_ctx)->current_draw_clause;
|
||||
draw_call.begin();
|
||||
u32 subdraw = 0u;
|
||||
do
|
||||
{
|
||||
emit_geometry(subdraw++);
|
||||
|
||||
if (draw_call.is_trivial_instanced_draw)
|
||||
{
|
||||
// We already completed. End the draw.
|
||||
draw_call.end();
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
}
|
||||
while (draw_call.next());
|
||||
|
||||
m_rtts.on_write(m_framebuffer_layout.color_write_enabled, m_framebuffer_layout.zeta_write_enabled);
|
||||
|
||||
|
|
|
@ -296,6 +296,7 @@ void GLGSRender::on_init_thread()
|
|||
m_fragment_instructions_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_raster_env_ring_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_scratch_ring_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
m_instancing_ring_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -311,6 +312,7 @@ void GLGSRender::on_init_thread()
|
|||
m_fragment_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_raster_env_ring_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_scratch_ring_buffer = std::make_unique<gl::ring_buffer>();
|
||||
m_instancing_ring_buffer = std::make_unique<gl::ring_buffer>();
|
||||
}
|
||||
|
||||
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
|
||||
|
@ -323,6 +325,7 @@ void GLGSRender::on_init_thread()
|
|||
m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_raster_env_ring_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_scratch_ring_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_instancing_ring_buffer->create(gl::buffer::target::ssbo, 64 * 0x100000);
|
||||
|
||||
if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only)
|
||||
{
|
||||
|
@ -547,6 +550,11 @@ void GLGSRender::on_exit()
|
|||
m_scratch_ring_buffer->remove();
|
||||
}
|
||||
|
||||
if (m_instancing_ring_buffer)
|
||||
{
|
||||
m_instancing_ring_buffer->remove();
|
||||
}
|
||||
|
||||
m_null_textures.clear();
|
||||
m_gl_texture_cache.destroy();
|
||||
m_ui_renderer.destroy();
|
||||
|
@ -866,7 +874,8 @@ void GLGSRender::load_program_env()
|
|||
const bool update_fragment_env = m_graphics_state & rsx::pipeline_state::fragment_state_dirty;
|
||||
const bool update_fragment_texture_env = m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty;
|
||||
const bool update_instruction_buffers = !!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program);
|
||||
const bool update_raster_env = rsx::method_registers.polygon_stipple_enabled() && (m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty);
|
||||
const bool update_raster_env = REGS(m_ctx)->polygon_stipple_enabled() && (m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty);
|
||||
const bool update_instancing_data = REGS(m_ctx)->current_draw_clause.is_trivial_instanced_draw;
|
||||
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
|
@ -876,6 +885,7 @@ void GLGSRender::load_program_env()
|
|||
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(utils::align(fragment_constants_size, 256));
|
||||
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
|
||||
if (update_raster_env) m_raster_env_ring_buffer->reserve_storage_on_heap(128);
|
||||
if (update_instancing_data) m_instancing_ring_buffer->reserve_storage_on_heap(8192 * REGS(m_ctx)->current_draw_clause.pass_count());
|
||||
|
||||
if (update_instruction_buffers)
|
||||
{
|
||||
|
@ -899,6 +909,33 @@ void GLGSRender::load_program_env()
|
|||
m_vertex_env_buffer->bind_range(GL_VERTEX_PARAMS_BIND_SLOT, mapping.second, 144);
|
||||
}
|
||||
|
||||
if (update_instancing_data)
|
||||
{
|
||||
// Combines transform load + instancing lookup table
|
||||
const auto alignment = m_min_ssbo_alignment;
|
||||
u32 indirection_table_offset = 0;
|
||||
u32 constants_data_table_offset = 0;
|
||||
|
||||
rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
const auto mapping = m_instancing_ring_buffer->alloc_from_heap(static_cast<u32>(size), alignment);
|
||||
indirection_table_offset = mapping.second;
|
||||
return mapping;
|
||||
});
|
||||
|
||||
rsx::io_buffer constants_array_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
const auto mapping = m_instancing_ring_buffer->alloc_from_heap(static_cast<u32>(size), alignment);
|
||||
constants_data_table_offset = mapping.second;
|
||||
return mapping;
|
||||
});
|
||||
|
||||
m_draw_processor.fill_constants_instancing_buffer(indirection_table_buf, constants_array_buf, m_vertex_prog);
|
||||
|
||||
m_instancing_ring_buffer->bind_range(GL_INSTANCING_LUT_BIND_SLOT, indirection_table_offset, ::size32(indirection_table_buf));
|
||||
m_instancing_ring_buffer->bind_range(GL_INSTANCING_XFORM_CONSTANTS_SLOT, constants_data_table_offset, ::size32(constants_array_buf));
|
||||
}
|
||||
|
||||
if (update_transform_constants)
|
||||
{
|
||||
// Vertex constants
|
||||
|
@ -1011,6 +1048,7 @@ void GLGSRender::load_program_env()
|
|||
if (update_fragment_constants) m_fragment_constants_buffer->unmap();
|
||||
if (update_transform_constants) m_transform_constants_buffer->unmap();
|
||||
if (update_raster_env) m_raster_env_ring_buffer->unmap();
|
||||
if (update_instancing_data) m_instancing_ring_buffer->unmap();
|
||||
|
||||
if (update_instruction_buffers)
|
||||
{
|
||||
|
|
|
@ -105,6 +105,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
|||
std::unique_ptr<gl::ring_buffer> m_vertex_instructions_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_fragment_instructions_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_raster_env_ring_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_instancing_ring_buffer;
|
||||
|
||||
// Identity buffer used to fix broken gl_VertexID on ATI stack
|
||||
std::unique_ptr<gl::buffer> m_identity_index_buffer;
|
||||
|
@ -117,6 +118,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
|||
|
||||
GLint m_min_texbuffer_alignment = 256;
|
||||
GLint m_uniform_buffer_offset_align = 256;
|
||||
GLint m_min_ssbo_alignment = 256;
|
||||
GLint m_max_texbuffer_size = 65536;
|
||||
|
||||
bool manually_flush_ring_buffers = false;
|
||||
|
|
|
@ -183,6 +183,9 @@ OPENGL_PROC(PFNGLUNMAPNAMEDBUFFEREXTPROC, UnmapNamedBufferEXT);
|
|||
OPENGL_PROC(PFNGLMULTIDRAWELEMENTSPROC, MultiDrawElements);
|
||||
OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
|
||||
|
||||
OPENGL_PROC(PFNGLDRAWARRAYSINSTANCEDPROC, DrawArraysInstanced);
|
||||
OPENGL_PROC(PFNGLDRAWELEMENTSINSTANCEDPROC, DrawElementsInstanced);
|
||||
|
||||
OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT);
|
||||
OPENGL_PROC(PFNGLGETTEXTUREIMAGEPROC, GetTextureImage);
|
||||
OPENGL_PROC(PFNGLGETTEXTURESUBIMAGEPROC, GetTextureSubImage);
|
||||
|
|
|
@ -28,24 +28,25 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri
|
|||
|
||||
void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
|
||||
{
|
||||
OS << "#version 430\n";
|
||||
OS << "layout(std140, binding = " << GL_VERTEX_PARAMS_BIND_SLOT << ") uniform VertexContextBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " mat4 scale_offset_mat;\n";
|
||||
OS << " ivec4 user_clip_enabled[2];\n";
|
||||
OS << " vec4 user_clip_factor[2];\n";
|
||||
OS << " uint transform_branch_bits;\n";
|
||||
OS << " float point_size;\n";
|
||||
OS << " float z_near;\n";
|
||||
OS << " float z_far;\n";
|
||||
OS << "};\n\n";
|
||||
OS <<
|
||||
"#version 430\n"
|
||||
"layout(std140, binding = " << GL_VERTEX_PARAMS_BIND_SLOT << ") uniform VertexContextBuffer\n"
|
||||
"{\n"
|
||||
" mat4 scale_offset_mat;\n"
|
||||
" ivec4 user_clip_enabled[2];\n"
|
||||
" vec4 user_clip_factor[2];\n"
|
||||
" uint transform_branch_bits;\n"
|
||||
" float point_size;\n"
|
||||
" float z_near;\n"
|
||||
" float z_far;\n"
|
||||
"};\n\n"
|
||||
|
||||
OS << "layout(std140, binding = " << GL_VERTEX_LAYOUT_BIND_SLOT << ") uniform VertexLayoutBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " uint vertex_base_index;\n";
|
||||
OS << " uint vertex_index_offset;\n";
|
||||
OS << " uvec4 input_attributes_blob[16 / 2];\n";
|
||||
OS << "};\n\n";
|
||||
"layout(std140, binding = " << GL_VERTEX_LAYOUT_BIND_SLOT << ") uniform VertexLayoutBuffer\n"
|
||||
"{\n"
|
||||
" uint vertex_base_index;\n"
|
||||
" uint vertex_index_offset;\n"
|
||||
" uvec4 input_attributes_blob[16 / 2];\n"
|
||||
"};\n\n";
|
||||
}
|
||||
|
||||
void GLVertexDecompilerThread::insertInputs(std::stringstream& OS, const std::vector<ParamType>& /*inputs*/)
|
||||
|
@ -62,10 +63,29 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream& OS, const std:
|
|||
{
|
||||
if (PI.name.starts_with("vc["))
|
||||
{
|
||||
OS << "layout(std140, binding = " << GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT << ") uniform VertexConstantsBuffer\n";
|
||||
OS << "{\n";
|
||||
OS << " vec4 " << PI.name << ";\n";
|
||||
OS << "};\n\n";
|
||||
if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS))
|
||||
{
|
||||
OS <<
|
||||
"layout(std140, binding = " << GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT << ") uniform VertexConstantsBuffer\n"
|
||||
"{\n"
|
||||
" vec4 " << PI.name << ";\n"
|
||||
"};\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS <<
|
||||
"layout(std430, binding = " << GL_INSTANCING_LUT_BIND_SLOT << ") readonly buffer InstancingIndirectionLUT\n"
|
||||
"{\n"
|
||||
" int constants_addressing_lookup[];\n"
|
||||
"};\n\n"
|
||||
|
||||
"layout(std430, binding = " << GL_INSTANCING_XFORM_CONSTANTS_SLOT << ") readonly buffer InstancingVertexConstantsBlock\n"
|
||||
"{\n"
|
||||
" vec4 instanced_constants_array[];\n"
|
||||
"};\n\n"
|
||||
|
||||
"#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n";
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
@ -152,6 +172,7 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
|||
properties2.emulate_depth_clip_only = dev_caps.NV_depth_buffer_float_supported;
|
||||
properties2.low_precision_tests = dev_caps.vendor_NVIDIA;
|
||||
properties2.require_explicit_invariance = dev_caps.vendor_MESA || (dev_caps.vendor_NVIDIA && g_cfg.video.shader_precision != gpu_preset_level::low);
|
||||
properties2.require_instanced_render = !!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS);
|
||||
|
||||
insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false);
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#define GL_RASTERIZER_STATE_BIND_SLOT UBO_SLOT(6)
|
||||
#define GL_INTERPRETER_VERTEX_BLOCK SSBO_SLOT(0)
|
||||
#define GL_INTERPRETER_FRAGMENT_BLOCK SSBO_SLOT(1)
|
||||
#define GL_INSTANCING_LUT_BIND_SLOT SSBO_SLOT(2)
|
||||
#define GL_INSTANCING_XFORM_CONSTANTS_SLOT SSBO_SLOT(3)
|
||||
#define GL_COMPUTE_BUFFER_SLOT(index) SSBO_SLOT(2 + index)
|
||||
#define GL_COMPUTE_IMAGE_SLOT(index) SSBO_SLOT(index)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue