gl: Improve AMD multidraw workaround

- Reimplements the AMD workaround using an identity buffer to avoid the performance hit of doing multiple glDrawArrays for every single compiled set
- Reimplements first/count allocation using a scratch buffer to reduce allocation overhead when large number of draw calls is used
This commit is contained in:
kd-11 2018-04-29 16:14:53 +03:00 committed by kd-11
parent eccb57d4b8
commit 1aa44ede31
2 changed files with 68 additions and 21 deletions

View file

@ -489,32 +489,33 @@ void GLGSRender::end()
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
} }
m_index_ring_buffer->bind();
if (single_draw) if (single_draw)
{ {
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
} }
else else
{ {
std::vector<GLsizei> counts;
std::vector<const GLvoid*> offsets;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size(); const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
uintptr_t index_ptr = index_offset; uintptr_t index_ptr = index_offset;
m_scratch_buffer.resize(draw_count * 16);
counts.reserve(draw_count); GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
offsets.reserve(draw_count); const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
int dst_index = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{ {
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
counts.push_back(index_size); counts[dst_index] = index_size;
offsets.push_back((const GLvoid*)index_ptr); offsets[dst_index++] = (const GLvoid*)index_ptr;
index_ptr += (index_size << type_scale); index_ptr += (index_size << type_scale);
} }
glMultiDrawElements(draw_mode, counts.data(), index_type, offsets.data(), (GLsizei)draw_count); glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
} }
} }
else else
@ -525,25 +526,36 @@ void GLGSRender::end()
} }
else else
{ {
u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; const u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
if (gl::get_driver_caps().vendor_AMD == false) bool use_draw_arrays_fallback = false;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
const auto driver_caps = gl::get_driver_caps();
m_scratch_buffer.resize(draw_count * 24);
GLint* firsts = (GLint*)m_scratch_buffer.data();
GLsizei* counts = (GLsizei*)(firsts + draw_count);
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
int dst_index = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{ {
std::vector<GLint> firsts; const GLint first = range.first - base_index;
std::vector<GLsizei> counts; const GLsizei count = range.second;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
firsts.reserve(draw_count); firsts[dst_index] = first;
counts.reserve(draw_count); counts[dst_index] = count;
offsets[dst_index++] = (const GLvoid*)(first << 2);
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) if (driver_caps.vendor_AMD && (first + count) > (0x100000 >> 2))
{ {
firsts.push_back(range.first - base_index); //Unlikely, but added here in case the identity buffer is not large enough somehow
counts.push_back(range.second); use_draw_arrays_fallback = true;
break;
} }
glMultiDrawArrays(draw_mode, firsts.data(), counts.data(), (GLsizei)draw_count);
} }
else
if (use_draw_arrays_fallback)
{ {
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more //MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
@ -551,6 +563,17 @@ void GLGSRender::end()
glDrawArrays(draw_mode, range.first - base_index, range.second); glDrawArrays(draw_mode, range.first - base_index, range.second);
} }
} }
else if (driver_caps.vendor_AMD)
{
//Use identity index buffer to fix broken vertexID on AMD
m_identity_index_buffer->bind();
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
}
else
{
//Normal render
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count);
}
} }
} }
@ -733,6 +756,21 @@ void GLGSRender::on_init_thread()
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
if (gl_caps.vendor_AMD)
{
m_identity_index_buffer.reset(new gl::buffer);
m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000);
// Initialize with 256k identity entries
auto *dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write);
for (u32 n = 0; n < (0x100000 >> 2); ++n)
{
dst[n] = n;
}
m_identity_index_buffer->unmap();
}
m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size)); m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size)); m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
m_gl_persistent_stream_buffer->copy_from(m_persistent_stream_view); m_gl_persistent_stream_buffer->copy_from(m_persistent_stream_view);
@ -902,6 +940,11 @@ void GLGSRender::on_exit()
m_index_ring_buffer->remove(); m_index_ring_buffer->remove();
} }
if (m_identity_index_buffer)
{
m_identity_index_buffer->remove();
}
m_null_textures.clear(); m_null_textures.clear();
m_text_printer.close(); m_text_printer.close();
m_gl_texture_cache.destroy(); m_gl_texture_cache.destroy();

View file

@ -290,6 +290,9 @@ private:
std::unique_ptr<gl::ring_buffer> m_vertex_state_buffer; std::unique_ptr<gl::ring_buffer> m_vertex_state_buffer;
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer; std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
// Identity buffer used to fix broken gl_VertexID on ATI stack
std::unique_ptr<gl::buffer> m_identity_index_buffer;
u32 m_draw_calls = 0; u32 m_draw_calls = 0;
s64 m_begin_time = 0; s64 m_begin_time = 0;
s64 m_draw_time = 0; s64 m_draw_time = 0;
@ -334,6 +337,7 @@ private:
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {}; std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {}; std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
std::unordered_map<GLenum, std::unique_ptr<gl::texture>> m_null_textures; std::unordered_map<GLenum, std::unique_ptr<gl::texture>> m_null_textures;
std::vector<u8> m_scratch_buffer;
public: public:
GLGSRender(); GLGSRender();