From 6b23e733d01346402e85556ff35720bef588681f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 22 Feb 2018 11:13:01 +0300 Subject: [PATCH] rsx/gl/vk: Improvements - gl: Do not call makeCurrent every flip - it is already called in set_current() - gl: Improve ring buffer behaviour; use sliding window to view buffers larger than maximum viewable hardware range NV hardware can only view 128M at a time - gl/vk: Bump transform constant heap size When lots of draw calls are issued, the heap is exhaused very fast (8k per draw) - gl: Remove CLIENT_STORAGE_BIT from ring buffers. Performance is marginally better without this flag (at least on windows) --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 16 ++-- rpcs3/Emu/RSX/GL/GLGSRender.h | 2 + rpcs3/Emu/RSX/GL/GLHelpers.h | 111 +++++++++++++++++++++------ rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 20 +++++ rpcs3/Emu/RSX/VK/VKGSRender.h | 2 +- rpcs3/rpcs3qt/gl_gs_frame.cpp | 1 - 6 files changed, 120 insertions(+), 32 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 8f4d2d6ffe..d151e9ffc7 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -736,14 +736,16 @@ void GLGSRender::on_init_thread() m_index_ring_buffer.reset(new gl::ring_buffer()); } - m_attrib_ring_buffer->create(gl::buffer::target::texture, std::min(m_max_texbuffer_size, 256 * 0x100000)); - m_index_ring_buffer->create(gl::buffer::target::element_array, std::min(m_max_texbuffer_size, 64 * 0x100000)); - m_transform_constants_buffer->create(gl::buffer::target::uniform, std::min(m_max_texbuffer_size, 16 * 0x100000)); - m_fragment_constants_buffer->create(gl::buffer::target::uniform, std::min(m_max_texbuffer_size, 16 * 0x100000)); - m_vertex_state_buffer->create(gl::buffer::target::uniform, std::min(m_max_texbuffer_size, 16 * 0x100000)); + m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000); + m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000); + m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000); + m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); - m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size()); - m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size()); + m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, m_max_texbuffer_size); + m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, m_max_texbuffer_size); + m_gl_persistent_stream_buffer.copy_from(m_persistent_stream_view); + m_gl_volatile_stream_buffer.copy_from(m_volatile_stream_view); m_vao.element_array_buffer = *m_index_ring_buffer; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 6aab3443be..e19a707942 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -279,6 +279,8 @@ private: gl::texture_cache m_gl_texture_cache; + gl::buffer_view m_persistent_stream_view; + gl::buffer_view m_volatile_stream_view; gl::texture m_gl_persistent_stream_buffer; gl::texture m_gl_volatile_stream_buffer; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index f4401aee75..d06d11528e 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -87,6 +87,7 @@ namespace gl bool initialized = false; bool vendor_INTEL = false; bool vendor_AMD = false; + bool vendor_NVIDIA = false; void initialize() { @@ -104,35 +105,35 @@ namespace gl if (ext_name == "GL_ARB_shader_draw_parameters") { ARB_shader_draw_parameters_supported = true; - find_count --; + find_count--; continue; } if (ext_name == "GL_EXT_direct_state_access") { EXT_dsa_supported = true; - find_count --; + find_count--; continue; } if (ext_name == "GL_ARB_direct_state_access") { ARB_dsa_supported = true; - find_count --; + find_count--; continue; } if (ext_name == "GL_ARB_buffer_storage") { ARB_buffer_storage_supported = true; - find_count --; + find_count--; continue; } if (ext_name == "GL_ARB_texture_buffer_object") { ARB_texture_buffer_supported = true; - find_count --; + find_count--; continue; } @@ -195,6 +196,10 @@ namespace gl if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT) EXT_dsa_supported = true; } + else if (vendor_string.find("nvidia") != std::string::npos) + { + vendor_NVIDIA = true; + } #ifdef _WIN32 else if (vendor_string.find("amd") != std::string::npos || vendor_string.find("ati") != std::string::npos) { @@ -864,7 +869,7 @@ namespace gl buffer::create(); glBindBuffer((GLenum)m_target, m_id); - glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_CLIENT_STORAGE_BIT | GL_MAP_COHERENT_BIT); + glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); m_memory_mapping = glMapBufferRange((GLenum)m_target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); verify(HERE), m_memory_mapping != nullptr; @@ -886,15 +891,19 @@ namespace gl if ((offset + alloc_size) > m_size) { if (!m_fence.is_empty()) + { m_fence.wait_for_signal(); + } + else + { + LOG_ERROR(RSX, "OOM Error: Ring buffer was likely being used without notify() being called"); + glFinish(); + } m_data_loc = 0; offset = 0; } - if (!m_data_loc) - m_fence.reset(); - //Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently m_data_loc = align(offset + alloc_size, 256); return std::make_pair(((char*)m_memory_mapping) + offset, offset); @@ -928,7 +937,8 @@ namespace gl //Notification of a draw command virtual void notify() { - if (m_fence.is_empty()) + //Insert fence about 25% into the buffer + if (m_fence.is_empty() && (m_data_loc > (m_size >> 2))) m_fence.reset(); } }; @@ -1046,6 +1056,69 @@ namespace gl void notify() override {} }; + class buffer_view + { + buffer* m_buffer = nullptr; + u32 m_offset = 0; + u32 m_range = 0; + GLenum m_format = GL_R8UI; + + public: + buffer_view(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI) + : m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format) + {} + + buffer_view() + {} + + void update(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI) + { + m_buffer = _buffer; + m_offset = offset; + m_range = range; + m_format = format; + } + + u32 offset() const + { + return m_offset; + } + + u32 range() const + { + return m_range; + } + + u32 format() const + { + return m_format; + } + + buffer* buffer() const + { + return m_buffer; + } + + bool in_range(u32 address, u32 size, u32& new_offset) const + { + if (address < m_offset) + return false; + + const u32 _offset = address - m_offset; + if (m_range < _offset) + return false; + + const auto remaining = m_range - _offset; + if (size <= remaining) + { + new_offset = _offset; + return true; + } + + return false; + } + }; + class vao { template @@ -1681,6 +1754,11 @@ namespace gl __glcheck glTextureBufferRange(id(), gl_format_type, buf.id(), offset, length); } + void copy_from(buffer_view &view) + { + copy_from(*view.buffer(), view.format(), view.offset(), view.range()); + } + void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) { buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf); @@ -2765,17 +2843,4 @@ namespace gl set_id(0); } }; - - class buffer_view : public buffer - { - public: - buffer_view(GLuint id) : buffer(id) - { - } - - ~buffer_view() - { - set_id(0); - } - }; } diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 43c9d3ad3f..ec7007888e 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -233,12 +233,32 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer() m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second); } } + + if (!m_persistent_stream_view.in_range(upload_info.persistent_mapping_offset, required.first, upload_info.persistent_mapping_offset)) + { + const size_t view_size = ((upload_info.persistent_mapping_offset + m_max_texbuffer_size) > m_attrib_ring_buffer->size()) ? + (m_attrib_ring_buffer->size() - upload_info.persistent_mapping_offset) : m_max_texbuffer_size; + + m_persistent_stream_view.update(m_attrib_ring_buffer.get(), upload_info.persistent_mapping_offset, (u32)view_size); + m_gl_persistent_stream_buffer.copy_from(m_persistent_stream_view); + upload_info.persistent_mapping_offset = 0; + } } if (required.second > 0) { volatile_mapping = m_attrib_ring_buffer->alloc_from_heap(required.second, m_min_texbuffer_alignment); upload_info.volatile_mapping_offset = volatile_mapping.second; + + if (!m_volatile_stream_view.in_range(upload_info.volatile_mapping_offset, required.second, upload_info.volatile_mapping_offset)) + { + const size_t view_size = ((upload_info.volatile_mapping_offset + m_max_texbuffer_size) > m_attrib_ring_buffer->size()) ? + (m_attrib_ring_buffer->size() - upload_info.volatile_mapping_offset) : m_max_texbuffer_size; + + m_volatile_stream_view.update(m_attrib_ring_buffer.get(), upload_info.volatile_mapping_offset, (u32)view_size); + m_gl_volatile_stream_buffer.copy_from(m_volatile_stream_view); + upload_info.volatile_mapping_offset = 0; + } } //Write all the data diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 693970c9c3..797896733f 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -40,7 +40,7 @@ namespace vk //NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px) #define VK_ATTRIB_RING_BUFFER_SIZE_M 256 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256 -#define VK_UBO_RING_BUFFER_SIZE_M 64 +#define VK_UBO_RING_BUFFER_SIZE_M 128 #define VK_INDEX_RING_BUFFER_SIZE_M 64 #define VK_MAX_ASYNC_CB_COUNT 64 diff --git a/rpcs3/rpcs3qt/gl_gs_frame.cpp b/rpcs3/rpcs3qt/gl_gs_frame.cpp index f69e1baebc..26ea770bfd 100644 --- a/rpcs3/rpcs3qt/gl_gs_frame.cpp +++ b/rpcs3/rpcs3qt/gl_gs_frame.cpp @@ -69,6 +69,5 @@ void gl_gs_frame::flip(draw_context_t context, bool skip_frame) //Do not swap buffers if frame skip is active if (skip_frame) return; - ((QOpenGLContext*)context)->makeCurrent(this); ((QOpenGLContext*)context)->swapBuffers(this); }