diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 3963336099..7f73832364 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -28,7 +28,7 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R return false; if (binary1.data.size() != binary2.data.size()) return false; - if (binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs) + if (!binary1.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs) return false; const qword *instBuffer1 = (const qword*)binary1.data.data(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index dc4d5d8ecb..8ee853cd1d 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -89,7 +89,7 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + 512)); fill_scale_offset_data(mapped_buffer, true); fill_user_clip_data((char*)mapped_buffer + 64); - fill_fragment_state_buffer((char *)mapped_buffer + 128, m_fragment_program); + fill_fragment_state_buffer((char *)mapped_buffer + 128, current_fragment_program); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 512)); D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { @@ -124,7 +124,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in D3D12_CONSTANT_BUFFER_VIEW_DESC D3D12GSRender::upload_fragment_shader_constants() { // Get constant from fragment program - size_t buffer_size = m_pso_cache.get_fragment_constants_buffer_size(m_fragment_program); + size_t buffer_size = m_pso_cache.get_fragment_constants_buffer_size(current_fragment_program); // Multiple of 256 never 0 buffer_size = (buffer_size + 255) & ~255; @@ -132,7 +132,7 @@ D3D12_CONSTANT_BUFFER_VIEW_DESC D3D12GSRender::upload_fragment_shader_constants( size_t offset = 0; float *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - m_pso_cache.fill_fragment_constants_buffer({ mapped_buffer, ::narrow(buffer_size) }, m_fragment_program); + m_pso_cache.fill_fragment_constants_buffer({ mapped_buffer, ::narrow(buffer_size) }, current_fragment_program); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); return { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index eacdb611ec..8e988602af 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -329,7 +329,7 @@ void D3D12GSRender::end() std::chrono::time_point program_load_end = steady_clock::now(); m_timers.program_load_duration += std::chrono::duration_cast(program_load_end - program_load_start).count(); - if (!m_fragment_program.valid) + if (!current_fragment_program.valid) { rsx::thread::end(); return; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 94f37968fb..80c86f3f07 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -61,8 +61,6 @@ private: data_cache m_texture_cache; bool invalidate_address(u32 addr); - RSXVertexProgram m_vertex_program; - RSXFragmentProgram m_fragment_program; PipelineStateObjectCache m_pso_cache; std::tuple, size_t, size_t> m_current_pso; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 8cbe35bbc3..c96e9ce0dc 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -53,10 +53,10 @@ void D3D12GSRender::load_program() return std::make_tuple(true, native_pitch); }; - m_vertex_program = get_current_vertex_program(); - m_fragment_program = get_current_fragment_program(rtt_lookup_func); + get_current_vertex_program(); + get_current_fragment_program(rtt_lookup_func); - if (!m_fragment_program.valid) + if (!current_fragment_program.valid) return; D3D12PipelineProperties prop = {}; @@ -308,12 +308,12 @@ void D3D12GSRender::load_program() } } - m_current_pso = m_pso_cache.getGraphicPipelineState(m_vertex_program, m_fragment_program, prop, m_device.Get(), m_shared_root_signature.Get()); + m_current_pso = m_pso_cache.getGraphicPipelineState(current_vertex_program, current_fragment_program, prop, m_device.Get(), m_shared_root_signature.Get()); return; } std::pair D3D12GSRender::get_programs() const { - return std::make_pair(m_pso_cache.get_transform_program(m_vertex_program).content, m_pso_cache.get_shader_program(m_fragment_program).content); + return std::make_pair(m_pso_cache.get_transform_program(current_vertex_program).content, m_pso_cache.get_shader_program(current_fragment_program).content); } #endif diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 4bbd320680..e90ec002cf 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -306,18 +306,12 @@ namespace void GLGSRender::end() { - std::chrono::time_point program_start = steady_clock::now(); - //Load program here since it is dependent on vertex state - - if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !load_program()) + if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state()) { rsx::thread::end(); return; } - std::chrono::time_point program_stop = steady_clock::now(); - m_begin_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); - if (manually_flush_ring_buffers) { //Use approximations to reseve space. This path is mostly for debug purposes anyway @@ -329,6 +323,32 @@ void GLGSRender::end() m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); } + //Do vertex upload before RTT prep / texture lookups to give the driver time to push data + u32 vertex_draw_count; + u32 actual_vertex_count; + u32 vertex_base; + std::optional > indexed_draw_info; + std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer(); + + std::chrono::time_point program_start = steady_clock::now(); + //Load program here since it is dependent on vertex state + + load_program(vertex_base, actual_vertex_count); + + std::chrono::time_point program_stop = steady_clock::now(); + m_begin_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); + + if (manually_flush_ring_buffers) + { + m_attrib_ring_buffer->unmap(); + m_index_ring_buffer->unmap(); + } + else + { + //DMA push; not needed with MAP_COHERENT + //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + } + //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared auto copy_rtt_contents = [](gl::render_target *surface) @@ -470,20 +490,6 @@ void GLGSRender::end() std::chrono::time_point textures_end = steady_clock::now(); m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); - u32 vertex_draw_count = m_last_vertex_count; - std::optional > indexed_draw_info; - bool skip_upload = false; - - if (!is_probable_instanced_draw()) - { - std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer(); - m_last_vertex_count = vertex_draw_count; - } - else - { - skip_upload = true; - } - std::chrono::time_point draw_start = steady_clock::now(); if (g_cfg.video.debug_output) @@ -491,45 +497,31 @@ void GLGSRender::end() m_program->validate(); } - if (manually_flush_ring_buffers) + if (indexed_draw_info) { - m_attrib_ring_buffer->unmap(); - m_index_ring_buffer->unmap(); - } + const GLenum index_type = std::get<0>(indexed_draw_info.value()); + const u32 index_offset = std::get<1>(indexed_draw_info.value()); - if (indexed_draw_info || (skip_upload && m_last_draw_indexed == true)) - { if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { - GLenum index_type = (skip_upload)? m_last_ib_type: std::get<0>(indexed_draw_info.value()); __glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); } - m_last_draw_indexed = true; - - if (!skip_upload) - { - m_last_ib_type = std::get<0>(indexed_draw_info.value()); - m_last_index_offset = std::get<1>(indexed_draw_info.value()); - } - - __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, m_last_ib_type, (GLvoid *)(std::ptrdiff_t)m_last_index_offset); + __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); } else { - draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count); - m_last_draw_indexed = false; + glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count); } m_attrib_ring_buffer->notify(); m_index_ring_buffer->notify(); - m_scale_offset_buffer->notify(); + m_vertex_state_buffer->notify(); m_fragment_constants_buffer->notify(); m_transform_constants_buffer->notify(); std::chrono::time_point draw_end = steady_clock::now(); m_draw_time += (u32)std::chrono::duration_cast(draw_end - draw_start).count(); - m_draw_calls++; if (zcull_task_queue.active_query && @@ -537,7 +529,6 @@ void GLGSRender::end() zcull_task_queue.active_query->num_draws++; synchronize_buffers(); - rsx::thread::end(); } @@ -620,13 +611,21 @@ void GLGSRender::on_init_thread() const u32 texture_index_offset = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; - for (int index = 0; index < rsx::limits::vertex_count; ++index) + //Array stream buffer { - auto &tex = m_gl_attrib_buffers[index]; + auto &tex = m_gl_persistent_stream_buffer; tex.create(); tex.set_target(gl::texture::target::textureBuffer); + glActiveTexture(GL_TEXTURE0 + texture_index_offset); + tex.bind(); + } - glActiveTexture(GL_TEXTURE0 + texture_index_offset + index); + //Register stream buffer + { + auto &tex = m_gl_volatile_stream_buffer; + tex.create(); + tex.set_target(gl::texture::target::textureBuffer); + glActiveTexture(GL_TEXTURE0 + texture_index_offset + 1); tex.bind(); } @@ -645,7 +644,7 @@ void GLGSRender::on_init_thread() m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer()); m_transform_constants_buffer.reset(new gl::legacy_ring_buffer()); m_fragment_constants_buffer.reset(new gl::legacy_ring_buffer()); - m_scale_offset_buffer.reset(new gl::legacy_ring_buffer()); + m_vertex_state_buffer.reset(new gl::legacy_ring_buffer()); m_index_ring_buffer.reset(new gl::legacy_ring_buffer()); } else @@ -653,7 +652,7 @@ void GLGSRender::on_init_thread() m_attrib_ring_buffer.reset(new gl::ring_buffer()); m_transform_constants_buffer.reset(new gl::ring_buffer()); m_fragment_constants_buffer.reset(new gl::ring_buffer()); - m_scale_offset_buffer.reset(new gl::ring_buffer()); + m_vertex_state_buffer.reset(new gl::ring_buffer()); m_index_ring_buffer.reset(new gl::ring_buffer()); } @@ -661,7 +660,7 @@ void GLGSRender::on_init_thread() m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000); m_transform_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); - m_scale_offset_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_vao.element_array_buffer = *m_index_ring_buffer; @@ -704,7 +703,7 @@ void GLGSRender::on_init_thread() void GLGSRender::on_exit() { - glDisable(GL_VERTEX_PROGRAM_POINT_SIZE); + glFinish(); m_prog_buffer.clear(); @@ -728,10 +727,8 @@ void GLGSRender::on_exit() m_vao.remove(); } - for (gl::texture &tex : m_gl_attrib_buffers) - { - tex.remove(); - } + m_gl_persistent_stream_buffer.remove(); + m_gl_volatile_stream_buffer.remove(); for (auto &sampler : m_gl_sampler_states) { @@ -753,9 +750,9 @@ void GLGSRender::on_exit() m_fragment_constants_buffer->remove(); } - if (m_scale_offset_buffer) + if (m_vertex_state_buffer) { - m_scale_offset_buffer->remove(); + m_vertex_state_buffer->remove(); } if (m_index_ring_buffer) @@ -865,7 +862,7 @@ bool GLGSRender::do_method(u32 cmd, u32 arg) return false; } -bool GLGSRender::load_program() +bool GLGSRender::check_program_state() { auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple { @@ -887,12 +884,19 @@ bool GLGSRender::load_program() return std::make_tuple(true, surface->get_native_pitch()); }; - RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); - if (!fragment_program.valid) return false; + get_current_fragment_program(rtt_lookup_func); - RSXVertexProgram vertex_program = get_current_vertex_program(); + if (current_fragment_program.valid == false) + return false; - u32 unnormalized_rtts = 0; + get_current_vertex_program(); + return true; +} + +void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) +{ + auto &fragment_program = current_fragment_program; + auto &vertex_program = current_vertex_program; for (auto &vtx : vertex_program.rsx_vertex_inputs) { @@ -906,12 +910,13 @@ bool GLGSRender::load_program() } } + vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side auto old_program = m_program; m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); m_program->use(); u8 *buf; - u32 scale_offset_offset; + u32 vertex_state_offset; u32 vertex_constants_offset; u32 fragment_constants_offset; @@ -920,17 +925,20 @@ bool GLGSRender::load_program() if (manually_flush_ring_buffers) { - m_scale_offset_buffer->reserve_storage_on_heap(512); + m_vertex_state_buffer->reserve_storage_on_heap(512); m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256)); if (m_transform_constants_dirty) m_transform_constants_buffer->reserve_storage_on_heap(8192); } - // Scale offset - auto mapping = m_scale_offset_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); + // Vertex state + auto mapping = m_vertex_state_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); buf = static_cast(mapping.first); - scale_offset_offset = mapping.second; + vertex_state_offset = mapping.second; fill_scale_offset_data(buf, false); - fill_user_clip_data((char *)buf + 64); + fill_user_clip_data(buf + 64); + *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); + *(reinterpret_cast(buf + 132)) = vertex_base; + fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast(buf + 144)); if (m_transform_constants_dirty) { @@ -939,7 +947,6 @@ bool GLGSRender::load_program() buf = static_cast(mapping.first); vertex_constants_offset = mapping.second; fill_vertex_program_constants_data(buf); - *(reinterpret_cast(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits(); } // Fragment constants @@ -952,21 +959,20 @@ bool GLGSRender::load_program() // Fragment state fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); - m_scale_offset_buffer->bind_range(0, scale_offset_offset, 512); + m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); if (m_transform_constants_dirty) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); if (manually_flush_ring_buffers) { - m_scale_offset_buffer->unmap(); + m_vertex_state_buffer->unmap(); m_fragment_constants_buffer->unmap(); if (m_transform_constants_dirty) m_transform_constants_buffer->unmap(); } m_transform_constants_dirty = false; - return true; } void GLGSRender::flip(int buffer) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index e1ab3f7b0d..39d9033921 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -50,12 +50,13 @@ private: gl::texture_cache m_gl_texture_cache; - gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; + gl::texture m_gl_persistent_stream_buffer; + gl::texture m_gl_volatile_stream_buffer; std::unique_ptr m_attrib_ring_buffer; std::unique_ptr m_fragment_constants_buffer; std::unique_ptr m_transform_constants_buffer; - std::unique_ptr m_scale_offset_buffer; + std::unique_ptr m_vertex_state_buffer; std::unique_ptr m_index_ring_buffer; u32 m_draw_calls = 0; @@ -83,11 +84,6 @@ private: bool flush_draw_buffers = false; - bool m_last_draw_indexed; - GLenum m_last_ib_type; - size_t m_last_index_offset; - u32 m_last_vertex_count; - public: gl::fbo draw_fbo; @@ -391,13 +387,16 @@ private: gl_state; // Return element to draw and in case of indexed draw index type and offset in index buffer - std::tuple > > set_vertex_buffer(); + std::tuple > > set_vertex_buffer(); + rsx::vertex_input_layout m_vertex_layout = {}; void clear_surface(u32 arg); + void init_buffers(bool skip_reading = false); + + bool check_program_state(); + void load_program(u32 vertex_base, u32 vertex_count); public: - bool load_program(); - void init_buffers(bool skip_reading = false); void read_buffers(); void write_buffers(); void set_viewport(); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 8149b02a21..607908bfb4 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -154,8 +154,17 @@ namespace gl } //Workaround for intel drivers which have terrible capability reporting - std::string vendor_string = (const char*)glGetString(GL_VENDOR); - std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower); + std::string vendor_string; + if (const char* raw_string = (const char*)glGetString(GL_VENDOR)) + { + vendor_string = raw_string; + std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower); + } + else + { + LOG_ERROR(RSX, "Failed to get vendor string from driver. Are we missing a context?"); + vendor_string = "intel"; //lowest acceptable value + } if (vendor_string.find("intel") != std::string::npos) { @@ -826,7 +835,7 @@ namespace gl buffer::create(); glBindBuffer((GLenum)m_target, m_id); - glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_CLIENT_STORAGE_BIT | GL_MAP_COHERENT_BIT); m_memory_mapping = glMapBufferRange((GLenum)m_target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); verify(HERE), m_memory_mapping != nullptr; @@ -2538,7 +2547,7 @@ namespace gl error_msg = buf.get(); } - throw validation_exception(error_msg); + LOG_ERROR(RSX, "Validation failed: %s", error_msg.c_str()); } } diff --git a/rpcs3/Emu/RSX/GL/GLProgramBuffer.h b/rpcs3/Emu/RSX/GL/GLProgramBuffer.h index 9f9eec53ad..b9560a0952 100644 --- a/rpcs3/Emu/RSX/GL/GLProgramBuffer.h +++ b/rpcs3/Emu/RSX/GL/GLProgramBuffer.h @@ -54,24 +54,11 @@ struct GLTraits result.uniforms[location] = (i + rsx::limits::fragment_textures_count); } - //We use texture buffers for vertex attributes. Bind these here as well - //as they are guaranteed to be fixed (1 to 1 mapping) - std::array s_reg_table = - { - "in_pos_buffer", "in_weight_buffer", "in_normal_buffer", - "in_diff_color_buffer", "in_spec_color_buffer", - "in_fog_buffer", - "in_point_size_buffer", "in_7_buffer", - "in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer", - "in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer" - }; + const int stream_buffer_start = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; - for (int i = 0; i < rsx::limits::vertex_count; ++i) - { - int location; - if (result.uniforms.has_location(s_reg_table[i], &location)) - result.uniforms[location] = (i + rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count); - } + //Bind locations 0 and 1 to the stream buffers + result.uniforms[0] = stream_buffer_start; + result.uniforms[1] = stream_buffer_start + 1; LOG_NOTICE(RSX, "*** prog id = %d", result.id()); LOG_NOTICE(RSX, "*** vp id = %d", vertexProgramData.id); diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 3681a68092..353b9941af 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -19,130 +19,6 @@ namespace namespace { - u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size) - { - /** - * NOTE 1. The buffer texture spec only allows fetches aligned to 8, 16, 32, etc... - * This rules out most 3-component formats, except for the 32-wide RGB32F, RGB32I, RGB32UI - * - * NOTE 2. While s1 & cmp types are signed normalized 16-bit integers, some GPU vendors dont support texture buffer access - * using these formats. Pass a 16 bit unnormalized integer and convert it in the vertex shader - */ - const u32 vec1_types[] = { GL_R16I, GL_R32F, GL_R16F, GL_R8, GL_R16I, GL_RGBA16I, GL_R8UI }; - const u32 vec2_types[] = { GL_RG16I, GL_RG32F, GL_RG16F, GL_RG8, GL_RG16I, GL_RGBA16I, GL_RG8UI }; - const u32 vec3_types[] = { GL_RGBA16I, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16I, GL_RGBA8UI }; //VEC3 COMPONENTS NOT SUPPORTED! - const u32 vec4_types[] = { GL_RGBA16I, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16I, GL_RGBA8UI }; - - const u32* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; - - if (type > rsx::vertex_base_type::ub256) - fmt::throw_exception("OpenGL error: unknown vertex base type 0x%x" HERE, (u32)type); - - return vec_selectors[size][(int)type]; - } - - void prepare_buffer_for_writing(void *data, rsx::vertex_base_type type, u8 vertex_size, u32 vertex_count) - { - switch (type) - { - case rsx::vertex_base_type::sf: - { - if (vertex_size == 3) - { - /** - * Pad the 4th component for half-float arrays to 1, since texelfetch does not mask components - */ - u16 *dst = reinterpret_cast(data); - for (u32 i = 0, idx = 3; i < vertex_count; ++i, idx += 4) - dst[idx] = 0x3c00; - } - - break; - } - } - } - - template - struct apply_attrib_t; - - template - struct apply_attrib_t - { - static void func(gl::glsl::program& program, int location, const T* data) - { - program.attribs[location] = data[0]; - } - }; - - template - struct apply_attrib_t - { - static void func(gl::glsl::program& program, int location, const T* data) - { - program.attribs[location] = color2_base{ data[0], data[1] }; - } - }; - - template - struct apply_attrib_t - { - static void func(gl::glsl::program& program, int location, const T* data) - { - program.attribs[location] = color3_base{ data[0], data[1], data[2] }; - } - }; - - template - struct apply_attrib_t - { - static void func(gl::glsl::program& program, int location, const T* data) - { - program.attribs[location] = color4_base{ data[0], data[1], data[2], data[3] }; - } - }; - - - template - void apply_attrib_array(gl::glsl::program& program, int location, const std::vector& data) - { - for (size_t offset = 0; offset < data.size(); offset += count * sizeof(T)) - { - apply_attrib_t::func(program, location, (T*)(data.data() + offset)); - } - } - - gl::buffer_pointer::type gl_types(rsx::vertex_base_type type) - { - switch (type) - { - case rsx::vertex_base_type::s1: return gl::buffer_pointer::type::s16; - case rsx::vertex_base_type::f: return gl::buffer_pointer::type::f32; - case rsx::vertex_base_type::sf: return gl::buffer_pointer::type::f16; - case rsx::vertex_base_type::ub: return gl::buffer_pointer::type::u8; - case rsx::vertex_base_type::s32k: return gl::buffer_pointer::type::s32; - case rsx::vertex_base_type::cmp: return gl::buffer_pointer::type::s16; // Needs conversion - case rsx::vertex_base_type::ub256: gl::buffer_pointer::type::u8; - } - fmt::throw_exception("unknown vertex type" HERE); - } - - bool gl_normalized(rsx::vertex_base_type type) - { - switch (type) - { - case rsx::vertex_base_type::s1: - case rsx::vertex_base_type::ub: - case rsx::vertex_base_type::cmp: - return true; - case rsx::vertex_base_type::f: - case rsx::vertex_base_type::sf: - case rsx::vertex_base_type::ub256: - case rsx::vertex_base_type::s32k: - return false; - } - fmt::throw_exception("unknown vertex type" HERE); - } - // return vertex count if primitive type is not native (empty array otherwise) std::tuple get_index_array_for_emulated_non_indexed_draw(const std::vector> &first_count_commands, rsx::primitive_type primitive_mode, gl::ring_buffer &dst) { @@ -195,85 +71,13 @@ namespace throw; } - - struct vertex_buffer_visitor + struct vertex_input_state { - vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset, gl::vertex_cache* vertex_cache) - : vertex_count(vtx_cnt) - , m_attrib_ring_info(heap) - , m_program(prog) - , m_gl_attrib_buffers(attrib_buffer) - , m_min_texbuffer_alignment(min_texbuffer_offset) - , m_vertex_cache(vertex_cache) - { - } - - void operator()(const rsx::vertex_array_buffer& vertex_array) - { - int location; - if (!m_program->uniforms.has_location(s_reg_table[vertex_array.index], &location)) - return; - - GLenum gl_type = to_gl_internal_type(vertex_array.type, vertex_array.attribute_size); - auto& texture = m_gl_attrib_buffers[vertex_array.index]; - const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); - const u32 data_size = vertex_count * element_size; - - const uintptr_t local_addr = (uintptr_t)vertex_array.data.data(); - u32 buffer_offset = 0; - - if (auto uploaded = m_vertex_cache->find_vertex_range(local_addr, gl_type, data_size)) - { - buffer_offset = uploaded->offset_in_heap; - } - else - { - // Fill vertex_array - auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment); - gsl::byte* dst = static_cast(mapping.first); - buffer_offset = mapping.second; - gsl::span dest_span(dst, data_size); - - m_vertex_cache->store_range(local_addr, gl_type, data_size, buffer_offset); - write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size)); - prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count); - } - - texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size); - } - - void operator()(const rsx::vertex_array_register& vertex_register) - { - int location; - if (!m_program->uniforms.has_location(s_reg_table[vertex_register.index], &location)) - return; - - const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size); - const u32 gl_type = to_gl_internal_type(vertex_register.type, vertex_register.attribute_size); - const u32 data_size = element_size; - - auto& texture = m_gl_attrib_buffers[vertex_register.index]; - - auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment); - u8 *dst = static_cast(mapping.first); - - memcpy(dst, vertex_register.data.data(), element_size); - prepare_buffer_for_writing(dst, vertex_register.type, vertex_register.attribute_size, vertex_count); - - texture.copy_from(m_attrib_ring_info, gl_type, mapping.second, data_size); - } - - void operator()(const rsx::empty_vertex_array& vbo) - { - } - - protected: - u32 vertex_count; - gl::ring_buffer& m_attrib_ring_info; - gl::glsl::program* m_program; - gl::texture* m_gl_attrib_buffers; - GLint m_min_texbuffer_alignment; - gl::vertex_cache* m_vertex_cache; + u32 vertex_draw_count; + u32 allocated_vertex_count; + u32 vertex_data_base; + u32 vertex_index_base; + std::optional> index_info; }; struct draw_command_visitor @@ -281,54 +85,32 @@ namespace using attribute_storage = std::vector< std::variant>; - draw_command_visitor(gl::ring_buffer& index_ring_buffer, gl::ring_buffer& attrib_ring_buffer, - gl::texture* gl_attrib_buffers, gl::glsl::program* program, GLint min_texbuffer_alignment, - gl::vertex_cache* vertex_cache, - std::function>)> gvb) + draw_command_visitor(gl::ring_buffer& index_ring_buffer, rsx::vertex_input_layout& vertex_layout) : m_index_ring_buffer(index_ring_buffer) - , m_attrib_ring_buffer(attrib_ring_buffer) - , m_gl_attrib_buffers(gl_attrib_buffers) - , m_program(program) - , m_min_texbuffer_alignment(min_texbuffer_alignment) - , get_vertex_buffers(gvb) - , m_vertex_cache(vertex_cache) - { - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { - if (rsx::method_registers.vertex_arrays_info[index].size() || - rsx::method_registers.register_vertex_info[index].size) - { - max_vertex_attrib_size += 16; - } - } - } + , m_vertex_layout(vertex_layout) + {} - std::tuple>> operator()( - const rsx::draw_array_command& command) + vertex_input_state operator()(const rsx::draw_array_command& command) { u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; u32 max_index = vertex_count - 1 + min_index; - if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { + if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) + { u32 index_count; u32 offset_in_index_buffer; std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( rsx::method_registers.current_draw_clause.first_count_commands, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); - upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size); - - return std::make_tuple(index_count, - std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer)); + return{ index_count, vertex_count, min_index, 0, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer) }; } - upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size); - - return std::make_tuple(vertex_count, std::optional>()); + return{ vertex_count, vertex_count, min_index, 0, std::optional>() }; } - std::tuple>> operator()( - const rsx::draw_indexed_array_command& command) + vertex_input_state operator()(const rsx::draw_indexed_array_command& command) { u32 min_index = 0, max_index = 0; @@ -338,7 +120,7 @@ namespace u32 type_size = ::narrow(get_index_type_size(type)); - u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); u32 index_count = vertex_count; if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) @@ -352,129 +134,114 @@ namespace std::tie(min_index, max_index, index_count) = upload_index_buffer( command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.first_count_commands, vertex_count); - - upload_vertex_buffers(0, max_index, max_vertex_attrib_size); - return std::make_tuple(index_count, std::make_tuple(get_index_type(type), offset_in_index_buffer)); + //check for vertex arrays with frquency modifiers + for (auto &block : m_vertex_layout.interleaved_blocks) + { + if (block.min_divisor > 1) + { + //Ignore base offsets and return real results + //The upload function will optimize the uploaded range anyway + return{ index_count, max_index, 0, 0, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; + } + } + + //Prefer only reading the vertices that are referenced in the index buffer itself + //Offset data source by min_index verts, but also notify the shader to offset the vertexID + return{ index_count, (max_index - min_index + 1), min_index, min_index, std::make_tuple(get_index_type(type), offset_in_index_buffer) }; } - std::tuple>> operator()( - const rsx::draw_inlined_array& command) + vertex_input_state operator()(const rsx::draw_inlined_array& command) { - // We need to go through array to determine vertex count so upload it - u32 vertex_count = upload_inline_array(max_vertex_attrib_size); + u32 vertex_count = (u32)command.inline_vertex_array.size() * sizeof(u32) / m_vertex_layout.interleaved_blocks[0].attribute_stride; - if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { + if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) + { u32 offset_in_index_buffer; u32 index_count; std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( { std::make_pair(0, vertex_count) }, - rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); - return std::make_tuple(index_count, - std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer)); + rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); + + return{ index_count, vertex_count, 0, 0, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer) }; } - return std::make_tuple(vertex_count, std::optional>()); + + return{ vertex_count, vertex_count, 0, 0, std::optional>() }; } private: - u32 max_vertex_attrib_size = 0; gl::ring_buffer& m_index_ring_buffer; - gl::ring_buffer& m_attrib_ring_buffer; - gl::texture* m_gl_attrib_buffers; - gl::vertex_cache* m_vertex_cache; - gl::glsl::program* m_program; - GLint m_min_texbuffer_alignment; - std::function>)> - get_vertex_buffers; - - void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size) - { - u32 verts_allocated = max_index - min_index + 1; - - vertex_buffer_visitor visitor(verts_allocated, m_attrib_ring_buffer, - m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment, m_vertex_cache); - const auto& vertex_buffers = - get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}}); - for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo); - } - - u32 upload_inline_array(const u32& max_vertex_attrib_size) - { - u32 stride = 0; - u32 offsets[rsx::limits::vertex_count] = {0}; - - for (u32 i = 0; i < rsx::limits::vertex_count; ++i) { - const auto& info = rsx::method_registers.vertex_arrays_info[i]; - if (!info.size()) continue; - - offsets[i] = stride; - stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - u32 vertex_draw_count = - (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / - stride; - - for (int index = 0; index < rsx::limits::vertex_count; ++index) { - auto& vertex_info = rsx::method_registers.vertex_arrays_info[index]; - - int location; - if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue; - - if (!vertex_info.size()) - continue; - - const u32 element_size = - rsx::get_vertex_type_size_on_host(vertex_info.type(), vertex_info.size()); - u32 data_size = element_size * vertex_draw_count; - u32 gl_type = to_gl_internal_type(vertex_info.type(), vertex_info.size()); - - auto& texture = m_gl_attrib_buffers[index]; - - u8* src = - reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); - auto mapping = m_attrib_ring_buffer.alloc_from_heap(data_size, m_min_texbuffer_alignment); - u8* dst = static_cast(mapping.first); - - src += offsets[index]; - prepare_buffer_for_writing(dst, vertex_info.type(), vertex_info.size(), vertex_draw_count); - - // TODO: properly handle compressed data - for (u32 i = 0; i < vertex_draw_count; ++i) { - if (vertex_info.type() == rsx::vertex_base_type::ub && vertex_info.size() == 4) { - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; - } - else - memcpy(dst, src, element_size); - - src += stride; - dst += element_size; - } - - texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size); - } - return vertex_draw_count; - } + rsx::vertex_input_layout& m_vertex_layout; }; } -std::tuple>> GLGSRender::set_vertex_buffer() +std::tuple>> GLGSRender::set_vertex_buffer() { std::chrono::time_point then = steady_clock::now(); - auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer, - m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment, - m_vertex_cache.get(), - [this](const auto& state, const auto& list) { - return this->get_vertex_buffers(state, list, 0); - }), - get_draw_command(rsx::method_registers)); + + m_vertex_layout = analyse_inputs_interleaved(); + + //Write index buffers and count verts + auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); + + auto &vertex_count = result.allocated_vertex_count; + auto &vertex_base = result.vertex_data_base; + + //Do actual vertex upload + auto &required = calculate_memory_requirements(m_vertex_layout, vertex_count); + + + std::pair persistent_mapping = {}, volatile_mapping = {}; + + if (required.first > 0) + { + //Check if cacheable + //Only data in the 'persistent' block may be cached + //TODO: make vertex cache keep local data beyond frame boundaries and hook notify command + bool in_cache = false; + bool to_store = false; + u32 storage_address = UINT32_MAX; + + if (m_vertex_layout.interleaved_blocks.size() == 1 && + rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array) + { + storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base; + if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first)) + { + in_cache = true; + m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, cached->offset_in_heap, required.first); + } + else + { + to_store = true; + } + } + + if (!in_cache) + { + persistent_mapping = m_attrib_ring_buffer->alloc_from_heap(required.first, m_min_texbuffer_alignment); + m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, persistent_mapping.second, required.first); + + if (to_store) + { + //store ref in vertex cache + m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second); + } + } + } + + if (required.second > 0) + { + volatile_mapping = m_attrib_ring_buffer->alloc_from_heap(required.second, m_min_texbuffer_alignment); + m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, volatile_mapping.second, required.second); + } + + //Write all the data + write_vertex_data_to_memory(m_vertex_layout, vertex_base, vertex_count, persistent_mapping.first, volatile_mapping.first); std::chrono::time_point now = steady_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); - return result; + return std::make_tuple(result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info); } namespace diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 3d3592cdf2..cf2cc39250 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -31,57 +31,21 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) { OS << "#version 430\n\n"; - OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer\n"; + OS << "layout(std140, binding = 0) uniform VertexContextBuffer\n"; OS << "{\n"; - OS << " mat4 scaleOffsetMat;\n"; - OS << " ivec4 userClipEnabled[2];\n"; - OS << " vec4 userClipFactor[2];\n"; - OS << "};\n"; + OS << " mat4 scale_offset_mat;\n"; + OS << " ivec4 user_clip_enabled[2];\n"; + OS << " vec4 user_clip_factor[2];\n"; + OS << " uint transform_branch_bits;\n"; + OS << " uint vertex_base_index;\n"; + OS << " ivec4 input_attributes[16];\n"; + OS << "};\n\n"; } void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector& inputs) { - std::vector> input_data; - for (const ParamType &PT : inputs) - { - for (const ParamItem &PI : PT.items) - { - input_data.push_back(std::make_tuple(PI.location, PI.name)); - } - } - - /** - * Its is important that the locations are in the order that vertex attributes are expected. - * If order is not adhered to, channels may be swapped leading to corruption - */ - - std::sort(input_data.begin(), input_data.end()); - - int location = 1; - for (const std::tuple& item : input_data) - { - for (const ParamType &PT : inputs) - { - for (const ParamItem &PI : PT.items) - { - if (PI.name == std::get<1>(item)) - { - bool is_int = false; - for (const auto &attrib : rsx_vertex_program.rsx_vertex_inputs) - { - if (attrib.location == std::get<0>(item)) - { - if (attrib.int_type || attrib.flags & GL_VP_SINT_MASK) is_int = true; - break; - } - } - - std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer"; - OS << "layout(location=" << location++ << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n"; - } - } - } - } + OS << "layout(location=0) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable) + OS << "layout(location=1) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data) } void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) @@ -89,7 +53,6 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std OS << "layout(std140, binding = 1) uniform VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[468];\n"; - OS << " uint transform_branch_bits;\n"; OS << "};\n\n"; for (const ParamType &PT: constants) @@ -115,13 +78,13 @@ static const vertex_reg_info reg_table[] = //Fog output shares a data source register with clip planes 0-2 so only declare when specified { "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG }, //Warning: Always define all 3 clip plane groups together to avoid flickering with openGL - { "gl_ClipDistance[0]", false, "dst_reg5", ".y * userClipFactor[0].x", false, "userClipEnabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, - { "gl_ClipDistance[1]", false, "dst_reg5", ".z * userClipFactor[0].y", false, "userClipEnabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, - { "gl_ClipDistance[2]", false, "dst_reg5", ".w * userClipFactor[0].z", false, "userClipEnabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, + { "gl_ClipDistance[0]", false, "dst_reg5", ".y * user_clip_factor[0].x", false, "user_clip_enabled[0].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, + { "gl_ClipDistance[1]", false, "dst_reg5", ".z * user_clip_factor[0].y", false, "user_clip_enabled[0].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, + { "gl_ClipDistance[2]", false, "dst_reg5", ".w * user_clip_factor[0].z", false, "user_clip_enabled[0].z > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC0 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC1 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC2 }, { "gl_PointSize", false, "dst_reg6", ".x", false }, - { "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, - { "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, - { "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, + { "gl_ClipDistance[3]", false, "dst_reg6", ".y * user_clip_factor[0].w", false, "user_clip_enabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, + { "gl_ClipDistance[4]", false, "dst_reg6", ".z * user_clip_factor[1].x", false, "user_clip_enabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, + { "gl_ClipDistance[5]", false, "dst_reg6", ".w * user_clip_factor[1].y", false, "user_clip_enabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 | CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, { "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 }, { "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 }, { "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 }, @@ -206,57 +169,175 @@ namespace } } - void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) + void insert_vertex_input_fetch(std::stringstream& OS) { - for (const auto &real_input : inputs) - { - if (real_input.location != PI.location) - continue; + //Actually decode a vertex attribute from a raw byte stream + OS << "struct attribute_desc\n"; + OS << "{\n"; + OS << " int type;\n"; + OS << " int attribute_size;\n"; + OS << " int starting_offset;\n"; + OS << " int stride;\n"; + OS << " int swap_bytes;\n"; + OS << " int is_volatile;\n"; + OS << " int frequency;\n"; + OS << " int divisor;\n"; + OS << " int modulo;\n"; + OS << "};\n\n"; - std::string vecType = " vec4 "; - if (real_input.int_type) - vecType = " ivec4 "; + OS << "uint get_bits(uvec4 v, int swap)\n"; + OS << "{\n"; + OS << " if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);\n"; + OS << " return (v.x | v.y << 8 | v.z << 16 | v.w << 24);\n"; + OS << "}\n\n"; - std::string scale = ""; - if (real_input.flags & GL_VP_SINT_MASK) - { - if (real_input.flags & GL_VP_ATTRIB_S16_INT) - scale = " / " + expand_to_vec4("32767.", real_input.size); - else - scale = " / " + expand_to_vec4("2147483647.", real_input.size); - } + OS << "uint get_bits(uvec2 v, int swap)\n"; + OS << "{\n"; + OS << " if (swap != 0) return (v.y | v.x << 8);\n"; + OS << " return (v.x | v.y << 8);\n"; + OS << "}\n\n"; - if (!real_input.is_array) - { - OS << vecType << PI.name << " = texelFetch(" << PI.name << "_buffer, 0)" << scale << ";\n"; - return; - } + OS << "int preserve_sign_s16(uint bits)\n"; + OS << "{\n"; + OS << " //convert raw 16 bit value into signed 32-bit integer counterpart\n"; + OS << " uint sign = bits & 0x8000;\n"; + OS << " if (sign != 0) return int(bits | 0xFFFF0000);\n"; + OS << " return int(bits);\n"; + OS << "}\n\n"; - if (real_input.frequency > 1) - { - if (real_input.is_modulo) - { - OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID %" << real_input.frequency << ")" << scale << ";\n"; - return; - } + OS << "float convert_to_f32(uint bits)\n"; + OS << "{\n"; + OS << " uint sign = (bits >> 31) & 1;\n"; + OS << " uint exp = (bits >> 23) & 0xff;\n"; + OS << " uint mantissa = bits & 0x7fffff;\n"; + OS << " float base = (sign != 0)? -1.f: 1.f;\n"; + OS << " base *= exp2(exp - 127);\n"; + OS << " float scale = 0.f;\n\n"; + OS << " for (int x = 0; x < 23; x++)\n"; + OS << " {\n"; + OS << " int inv = (22 - x);\n"; + OS << " if ((mantissa & (1 << inv)) == 0) continue;\n"; + OS << " scale += 1.f / pow(2.f, float(inv));\n"; + OS << " }\n"; + OS << " return base * scale;\n"; + OS << "}\n"; - OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID /" << real_input.frequency << ")" << scale << ";\n"; - return; - } + OS << "#define get_s16(v, s) preserve_sign_s16(get_bits(v, s))\n\n"; - OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID)" << scale << ";\n"; - return; - } + OS << "vec4 fetch_attribute(attribute_desc desc, int vertex_id, usamplerBuffer input_stream)\n"; + OS << "{\n"; + OS << " vec4 result = vec4(0., 0., 0., 1.);\n"; + OS << " vec4 scale = vec4(1.);\n"; + OS << " uvec4 tmp;\n"; + OS << " uint bits;\n"; + OS << " bool reverse_order = false;\n"; + OS << "\n"; + OS << " int first_byte = (vertex_id * desc.stride) + desc.starting_offset;\n"; + OS << " for (int n = 0; n < desc.attribute_size; n++)\n"; + OS << " {\n"; + OS << " switch (desc.type)\n"; + OS << " {\n"; + OS << " case 0:\n"; + OS << " //signed normalized 16-bit\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; + OS << " scale[n] = 32767.;\n"; + OS << " break;\n"; + OS << " case 1:\n"; + OS << " //float\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));\n"; + OS << " break;\n"; + OS << " case 2:\n"; + OS << " //half\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x;\n"; + OS << " break;\n"; + OS << " case 3:\n"; + OS << " //unsigned byte\n"; + OS << " result[n] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " scale[n] = 255.;\n"; + OS << " reverse_order = (desc.swap_bytes != 0);\n"; + OS << " break;\n"; + OS << " case 4:\n"; + OS << " //signed word\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; + OS << " break;\n"; + OS << " case 5:\n"; + OS << " //cmp\n"; + OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " bits = get_bits(tmp, desc.swap_bytes);\n"; + OS << " result.x = preserve_sign_s16((bits & 0x7FF) << 5);\n"; + OS << " result.y = preserve_sign_s16(((bits >> 11) & 0x7FF) << 5);\n"; + OS << " result.z = preserve_sign_s16(((bits >> 22) & 0x3FF) << 6);\n"; + OS << " result.w = 1.;\n"; + OS << " scale = vec4(32767., 32767., 32767., 1.);\n"; + OS << " break;\n"; + OS << " case 6:\n"; + OS << " //ub256\n"; + OS << " result[n] = float(texelFetch(input_stream, first_byte++).x);\n"; + OS << " reverse_order = (desc.swap_bytes != 0);\n"; + OS << " break;\n"; + OS << " }\n"; + OS << " }\n\n"; + OS << " result /= scale;\n"; + OS << " return (reverse_order)? result.wzyx: result;\n"; + OS << "}\n\n"; - LOG_WARNING(RSX, "Vertex input %s does not have a matching vertex_input declaration", PI.name.c_str()); + OS << "attribute_desc fetch_desc(int location)\n"; + OS << "{\n"; + OS << " attribute_desc result;\n"; + OS << " int attribute_flags = input_attributes[location].w;\n"; + OS << " result.type = input_attributes[location].x;\n"; + OS << " result.attribute_size = input_attributes[location].y;\n"; + OS << " result.starting_offset = input_attributes[location].z;\n"; + OS << " result.stride = attribute_flags & 0xFF;\n"; + OS << " result.swap_bytes = (attribute_flags >> 8) & 0x1;\n"; + OS << " result.is_volatile = (attribute_flags >> 9) & 0x1;\n"; + OS << " result.frequency = (attribute_flags >> 10) & 0x3;\n"; + OS << " result.modulo = (attribute_flags >> 12) & 0x1;\n"; + OS << " result.divisor = (attribute_flags >> 13) & 0xFFFF;\n"; + OS << " return result;\n"; + OS << "}\n\n"; - OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID);\n"; + OS << "vec4 read_location(int location)\n"; + OS << "{\n"; + OS << " attribute_desc desc = fetch_desc(location);\n"; + OS << "\n"; + OS << " int vertex_id = gl_VertexID - int(vertex_base_index);\n"; + OS << " if (desc.frequency == 0)\n"; + OS << " vertex_id = 0;\n"; + OS << " else if (desc.frequency > 1)\n"; + OS << " {\n"; + OS << " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"; + OS << " if (desc.modulo != 0)\n"; + OS << " vertex_id = gl_VertexID % desc.divisor;\n"; + OS << " else\n"; + OS << " vertex_id = gl_VertexID / desc.divisor;\n"; + OS << " }\n"; + OS << "\n"; + OS << " if (desc.is_volatile != 0)\n"; + OS << " return fetch_attribute(desc, vertex_id, volatile_input_stream);\n"; + OS << " else\n"; + OS << " return fetch_attribute(desc, vertex_id, persistent_input_stream);\n"; + OS << "}\n\n"; } } void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { insert_glsl_legacy_function(OS, gl::glsl::glsl_vertex_program); + insert_vertex_input_fetch(OS); std::string parameters = ""; for (int i = 0; i < 16; ++i) @@ -293,7 +374,9 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) for (const ParamType &PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) - add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); + { + OS << " vec4 " << PI.name << "= read_location(" << std::to_string(PI.location) << ");\n"; + } } for (const ParamType &PT : m_parr.params[PF_PARAM_UNIFORM]) @@ -401,7 +484,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS) if (m_parr.HasParam(PF_PARAM_NONE, "vec4", "dst_reg2")) OS << " front_spec_color = dst_reg2;\n"; - OS << " gl_Position = gl_Position * scaleOffsetMat;\n"; + OS << " gl_Position = gl_Position * scale_offset_mat;\n"; //Since our clip_space is symetrical [-1, 1] we map it to linear space using the eqn: //ln = (clip * 2) - 1 to fully utilize the 0-1 range of the depth buffer diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.h b/rpcs3/Emu/RSX/GL/GLVertexProgram.h index 29ade83e5f..25d01d2da6 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.h +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.h @@ -30,6 +30,7 @@ protected: virtual void insertMainEnd(std::stringstream &OS) override; const RSXVertexProgram &rsx_vertex_program; + std::unordered_map input_locations; public: GLVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray&) : VertexProgramDecompiler(prog) @@ -50,6 +51,7 @@ public: ParamArray parr; u32 id = 0; std::string shader; + bool interleaved; void Decompile(const RSXVertexProgram& prog); void Compile(); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d4b1362aa0..b4663e4df5 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -125,7 +125,7 @@ namespace rsx return sizeof(u8) * 4; } fmt::throw_exception("Wrong vector size" HERE); - case vertex_base_type::cmp: return sizeof(u16) * 4; + case vertex_base_type::cmp: return 4; case vertex_base_type::ub256: verify(HERE), (size == 4); return sizeof(u8) * 4; } fmt::throw_exception("RSXVertexData::GetTypeSize: Bad vertex data type (%d)!" HERE, (u8)type); @@ -309,32 +309,6 @@ namespace rsx return (u32)element_push_buffer.size(); } - bool thread::is_probable_instanced_draw() - { - if (!g_cfg.video.batch_instanced_geometry) - return false; - - //If the array registers have not been touched, the index array has also not been touched via notify or via register set, its likely an instanced draw - //gcm lib will set the registers once and then call the same draw command over and over with different transform params to achieve this - if (m_index_buffer_changed || m_vertex_attribs_changed) - return false; - - auto& draw_clause = rsx::method_registers.current_draw_clause; - if (draw_clause.command != m_last_command) - return false; - - if (draw_clause.command != rsx::draw_command::inlined_array) - { - if (draw_clause.first_count_commands.back().second != m_last_first_count.second || - draw_clause.first_count_commands.front().first != m_last_first_count.first) - return false; - } - else if (m_last_first_count.second != draw_clause.inline_vertex_array.size()) - return false; - - return true; - } - void thread::end() { rsx::method_registers.transform_constants.clear(); @@ -355,17 +329,6 @@ namespace rsx u32 element_count = rsx::method_registers.current_draw_clause.get_elements_count(); capture_frame("Draw " + rsx::to_string(rsx::method_registers.current_draw_clause.primitive) + std::to_string(element_count)); } - - auto& clause = rsx::method_registers.current_draw_clause; - - m_last_command = clause.command; - if (m_last_command == rsx::draw_command::inlined_array) - m_last_first_count = std::make_pair(0, (u32)clause.inline_vertex_array.size()); - else - m_last_first_count = std::make_pair(clause.first_count_commands.front().first, clause.first_count_commands.back().second); - - m_index_buffer_changed = false; - m_vertex_attribs_changed = false; } void thread::on_task() @@ -543,20 +506,6 @@ namespace rsx m_vblank_thread->join(); m_vblank_thread.reset(); } - - if (m_vertex_streaming_task.available_threads > 0) - { - for (auto &task : m_vertex_streaming_task.worker_threads) - { - if (!task.worker_thread) - break; - - task.worker_thread->join(); - task.worker_thread.reset(); - } - - m_vertex_streaming_task.available_threads = 0; - } } std::string thread::get_name() const @@ -920,9 +869,10 @@ namespace rsx return rsx::get_address(offset_zeta, m_context_dma_z); } - RSXVertexProgram thread::get_current_vertex_program() const + void thread::get_current_vertex_program() { - RSXVertexProgram result = {}; + auto &result = current_vertex_program = {}; + const u32 transform_program_start = rsx::method_registers.transform_program_start(); result.data.reserve((512 - transform_program_start) * 4); result.rsx_vertex_inputs.reserve(rsx::limits::vertex_count); @@ -980,16 +930,144 @@ namespace rsx is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0}); } } + } + + vertex_input_layout thread::analyse_inputs_interleaved() const + { + const rsx_state& state = rsx::method_registers; + const u32 input_mask = state.vertex_attrib_input_mask(); + + if (state.current_draw_clause.command == rsx::draw_command::inlined_array) + { + vertex_input_layout result = {}; + + interleaved_range_info info = {}; + info.interleaved = true; + info.locations.reserve(8); + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + const u32 mask = (1u << index); + auto &vinfo = state.vertex_arrays_info[index]; + + if (vinfo.size() > 0) + { + info.locations.push_back(index); + info.attribute_stride += rsx::get_vertex_type_size_on_host(vinfo.type(), vinfo.size()); + result.attribute_placement[index] = attribute_buffer_placement::transient; + } + } + + result.interleaved_blocks.push_back(info); + return result; + } + + const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask(); + vertex_input_layout result = {}; + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + const bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; + + if (vertex_push_buffers[index].size > 0) + { + std::pair volatile_range_info = std::make_pair(index, vertex_push_buffers[index].data.size() * (u32)sizeof(u32)); + result.volatile_blocks.push_back(volatile_range_info); + result.attribute_placement[index] = attribute_buffer_placement::transient; + continue; + } + + //Check for interleaving + auto &info = state.vertex_arrays_info[index]; + if (info.size() == 0 && state.register_vertex_info[index].size > 0) + { + //Reads from register + result.referenced_registers.push_back(index); + result.attribute_placement[index] = attribute_buffer_placement::transient; + continue; + } + + if (info.size() > 0) + { + result.attribute_placement[index] = attribute_buffer_placement::persistent; + const u32 base_address = info.offset() & 0x7fffffff; + bool alloc_new_block = true; + + for (auto &block : result.interleaved_blocks) + { + if (block.attribute_stride != info.stride()) + { + //Stride does not match, continue + continue; + } + + if (base_address > block.base_offset) + { + const u32 diff = base_address - block.base_offset; + if (diff > info.stride()) + { + //Not interleaved, continue + continue; + } + } + else + { + const u32 diff = block.base_offset - base_address; + if (diff > info.stride()) + { + //Not interleaved, continue + continue; + } + + //Matches, and this address is lower than existing + block.base_offset = base_address; + } + + alloc_new_block = false; + block.locations.push_back(index); + block.interleaved = true; + block.min_divisor = std::min(block.min_divisor, info.frequency()); + + if (block.all_modulus) + block.all_modulus = !!(frequency_divider_mask & (1 << index)); + + break; + } + + if (alloc_new_block) + { + interleaved_range_info block = {}; + block.base_offset = base_address; + block.attribute_stride = info.stride(); + block.memory_location = info.offset() >> 31; + block.locations.reserve(4); + block.locations.push_back(index); + block.min_divisor = info.frequency(); + block.all_modulus = !!(frequency_divider_mask & (1 << index)); + + result.interleaved_blocks.push_back(block); + } + } + } + + for (auto &info : result.interleaved_blocks) + { + //Calculate real data address to be used during upload + info.real_offset_address = state.vertex_data_base_offset() + rsx::get_address(info.base_offset, info.memory_location); + } + return result; } - RSXFragmentProgram thread::get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const + void thread::get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) { - RSXFragmentProgram result = {}; + auto &result = current_fragment_program = {}; const u32 shader_program = rsx::method_registers.shader_program_address(); if (shader_program == 0) - return result; + return; const u32 program_location = (shader_program & 0x3) - 1; const u32 program_offset = (shader_program & ~0x3); @@ -1064,8 +1142,6 @@ namespace rsx } result.set_texture_dimension(texture_dimensions); - - return result; } void thread::reset() @@ -1141,121 +1217,282 @@ namespace rsx } } - void thread::post_vertex_stream_to_upload(gsl::span src, gsl::span dst, rsx::vertex_base_type type, u32 vector_element_count, - u32 attribute_src_stride, u8 dst_stride, u32 vertex_count, std::function callback) + std::pair thread::calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count) { - upload_stream_packet packet; - packet.dst_span = dst; - packet.src_span = src; - packet.src_stride = attribute_src_stride; - packet.type = type; - packet.dst_stride = dst_stride; - packet.vector_width = vector_element_count; - packet.post_upload_func = callback; - packet.vertex_count = vertex_count; + u32 persistent_memory_size = 0; + u32 volatile_memory_size = 0; - if (m_vertex_streaming_task.available_threads == 0) + volatile_memory_size += (u32)layout.referenced_registers.size() * 16u; + + if (rsx::method_registers.current_draw_clause.is_immediate_draw) { - const u32 streaming_thread_count = (u32)g_cfg.video.vertex_upload_threads; - m_vertex_streaming_task.available_threads = streaming_thread_count; - - for (u32 n = 0; n < streaming_thread_count; ++n) + for (auto &info : layout.volatile_blocks) { - thread_ctrl::spawn(m_vertex_streaming_task.worker_threads[n].worker_thread, "Vertex Stream " + std::to_string(n), [this, n]() + volatile_memory_size += info.second; + } + } + else if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + for (auto &block : layout.interleaved_blocks) + { + volatile_memory_size += block.attribute_stride * vertex_count; + } + } + else + { + for (auto &block : layout.interleaved_blocks) + { + u32 unique_verts; + + if (block.min_divisor > 1) { - auto &owner = m_vertex_streaming_task; - auto &task = m_vertex_streaming_task.worker_threads[n]; - const u32 index = n; - - while (!Emu.IsStopped()) + if (block.all_modulus) + unique_verts = block.min_divisor; + else { - if (task.thread_status.load(std::memory_order_consume) != 0) - { - for (auto &packet: task.packets) - { - write_vertex_array_data_to_buffer(packet.dst_span, packet.src_span, packet.vertex_count, packet.type, packet.vector_width, packet.src_stride, packet.dst_stride); - - if (packet.post_upload_func) - packet.post_upload_func(packet.dst_span.data(), packet.type, (u8)packet.vector_width, packet.vertex_count); - - owner.remaining_tasks--; - } - - task.packets.resize(0); - task.thread_status.store(0); - _mm_sfence(); - } - - std::this_thread::yield(); + unique_verts = vertex_count / block.min_divisor; + if (vertex_count % block.min_divisor) unique_verts++; } - }); + } + else + unique_verts = vertex_count; + + persistent_memory_size += block.attribute_stride * unique_verts; } } - //Increment job counter.. - m_vertex_streaming_task.remaining_tasks++; + return std::make_pair(persistent_memory_size, volatile_memory_size); + } - //Assign this packet to a thread - //Simple round robin based on first available thread - upload_stream_worker *best_fit = nullptr; - for (auto &worker : m_vertex_streaming_task.worker_threads) + void thread::fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer) + { + std::array offset_in_block = {}; + u32 volatile_offset = 0; + u32 persistent_offset = 0; + + if (rsx::method_registers.current_draw_clause.is_immediate_draw) { - if (!worker.worker_thread) - break; - - if (worker.thread_status.load(std::memory_order_consume) == 0) + for (auto &info : layout.volatile_blocks) { - if (worker.packets.size() == 0) + offset_in_block[info.first] = volatile_offset; + volatile_offset += info.second; + } + } + + for (u8 index : layout.referenced_registers) + { + offset_in_block[index] = volatile_offset; + volatile_offset += 16; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + auto &block = layout.interleaved_blocks[0]; + for (u8 index : block.locations) + { + auto &info = rsx::method_registers.vertex_arrays_info[index]; + + offset_in_block[index] = persistent_offset; //just because this var is 0 when we enter here; inlined is transient memory + persistent_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + } + else + { + for (auto &block : layout.interleaved_blocks) + { + for (u8 index : block.locations) { - worker.packets.push_back(packet); - return; + const u32 local_address = (rsx::method_registers.vertex_arrays_info[index].offset() & 0x7fffffff); + offset_in_block[index] = persistent_offset + (local_address - block.base_offset); } - if (best_fit == nullptr) - best_fit = &worker; - else if (best_fit->packets.size() > worker.packets.size()) - best_fit = &worker; + u32 unique_verts; + + if (block.min_divisor > 1) + { + if (block.all_modulus) + unique_verts = block.min_divisor; + else + { + unique_verts = vertex_count / block.min_divisor; + if (vertex_count % block.min_divisor) unique_verts++; + } + } + else + unique_verts = vertex_count; + + persistent_offset += block.attribute_stride * unique_verts; } } - best_fit->packets.push_back(packet); - } + //Fill the data + memset(buffer, 0, 256); - void thread::start_vertex_upload_task() - { - for (auto &worker : m_vertex_streaming_task.worker_threads) + const s32 swap_storage_mask = (1 << 8); + const s32 volatile_storage_mask = (1 << 9); + const s32 default_frequency_mask = (1 << 10); + const s32 repeating_frequency_mask = (3 << 10); + const s32 input_function_modulo_mask = (1 << 12); + const s32 input_divisor_mask = (0xFFFF << 13); + + const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { - if (!worker.worker_thread) - break; + if (layout.attribute_placement[index] == attribute_buffer_placement::none) + continue; - if (worker.thread_status.load(std::memory_order_consume) == 0 && worker.packets.size() > 0) + rsx::vertex_base_type type = {}; + s32 size = 0; + s32 attributes = 0; + + bool is_be_type = true; + + if (layout.attribute_placement[index] == attribute_buffer_placement::transient) { - worker.thread_status.store(1); + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + auto &info = rsx::method_registers.vertex_arrays_info[index]; + type = info.type(); + size = (s32)info.size(); + + attributes = layout.interleaved_blocks[0].attribute_stride; + attributes |= default_frequency_mask | volatile_storage_mask; + + is_be_type = false; + } + else if (rsx::method_registers.current_draw_clause.is_immediate_draw) + { + auto &info = rsx::method_registers.register_vertex_info[index]; + type = info.type; + size = (s32)info.size; + + attributes = rsx::get_vertex_type_size_on_host(type, size); + attributes |= default_frequency_mask | volatile_storage_mask; + + is_be_type = true; + } + else + { + //Register + auto& info = rsx::method_registers.register_vertex_info[index]; + type = info.type; + size = (s32)info.size; + + attributes = rsx::get_vertex_type_size_on_host(type, size); + attributes |= volatile_storage_mask; + + is_be_type = false; + } + } + else + { + auto &info = rsx::method_registers.vertex_arrays_info[index]; + type = info.type(); + size = info.size(); + + const u32 frequency = info.frequency(); + switch (frequency) + { + case 0: + case 1: + attributes |= default_frequency_mask; + break; + default: + { + if (modulo_mask & (1 << index)) + attributes |= input_function_modulo_mask; + + attributes |= repeating_frequency_mask; + attributes |= (frequency << 13) & input_divisor_mask; + } + } + + attributes |= info.stride(); + } //end attribute placement check + + switch (type) + { + case rsx::vertex_base_type::cmp: + size = 1; + //fall through + default: + if (is_be_type) attributes |= swap_storage_mask; + break; + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::ub256: + if (!is_be_type) attributes |= swap_storage_mask; + break; + } + + buffer[index * 4 + 0] = (s32)type; + buffer[index * 4 + 1] = size; + buffer[index * 4 + 2] = (s32)offset_in_block[index]; + buffer[index * 4 + 3] = attributes; + } + } + + void thread::write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data) + { + char *transient = (char *)volatile_data; + char *persistent = (char *)persistent_data; + + auto &draw_call = rsx::method_registers.current_draw_clause; + + if (transient != nullptr) + { + if (draw_call.command == rsx::draw_command::inlined_array) + { + memcpy(transient, draw_call.inline_vertex_array.data(), draw_call.inline_vertex_array.size() * sizeof(u32)); + //Is it possible to reference data outside of the inlined array? + return; + } + + for (u8 index : layout.referenced_registers) + { + memcpy(transient, rsx::method_registers.register_vertex_info[index].data.data(), 16); + transient += 16; + } + + if (draw_call.is_immediate_draw) + { + for (auto &info : layout.volatile_blocks) + { + memcpy(transient, vertex_push_buffers[info.first].data.data(), info.second); + transient += info.second; + } + + return; } } - } - void thread::wait_for_vertex_upload_task() - { - while (m_vertex_streaming_task.remaining_tasks.load(std::memory_order_consume) != 0 && !Emu.IsStopped()) + if (persistent != nullptr) { - _mm_pause(); + for (auto &block : layout.interleaved_blocks) + { + u32 unique_verts; + u32 vertex_base = first_vertex * block.attribute_stride; + + if (block.min_divisor > 1) + { + if (block.all_modulus) + unique_verts = block.min_divisor; + else + { + unique_verts = vertex_count / block.min_divisor; + if (vertex_count % block.min_divisor) unique_verts++; + } + } + else + unique_verts = vertex_count; + + const u32 data_size = block.attribute_stride * unique_verts; + memcpy(persistent, (char*)vm::base(block.real_offset_address) + vertex_base, data_size); + persistent += data_size; + } } } - bool thread::vertex_upload_task_ready() - { - if (g_cfg.video.vertex_upload_threads < 2) - return false; - - //Not initialized - if (m_vertex_streaming_task.available_threads == 0) - return true; - - //At least two threads are available - return (m_vertex_streaming_task.remaining_tasks < (m_vertex_streaming_task.available_threads - 1)); - } - void thread::flip(int buffer) { if (g_cfg.video.frame_skip_enabled) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 05bbb54db9..9489e0deea 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -105,6 +105,35 @@ namespace rsx std::vector inline_vertex_array; }; + struct interleaved_range_info + { + bool interleaved = false; + bool all_modulus = false; + u32 base_offset = 0; + u32 real_offset_address = 0; + u8 memory_location = 0; + u8 attribute_stride = 0; + u16 min_divisor = 0; + + std::vector locations; + }; + + enum attribute_buffer_placement : u8 + { + none = 0, + persistent = 1, + transient = 2 + }; + + struct vertex_input_layout + { + std::vector interleaved_blocks; //Interleaved blocks to be uploaded as-is + std::vector> volatile_blocks; //Volatile data blocks (immediate draw vertex data for example) + std::vector referenced_registers; //Volatile register data + + std::array attribute_placement; + }; + class thread : public named_thread { std::shared_ptr m_vblank_thread; @@ -152,8 +181,6 @@ namespace rsx bool m_rtts_dirty; bool m_transform_constants_dirty; bool m_textures_dirty[16]; - bool m_vertex_attribs_changed; - bool m_index_buffer_changed; protected: s32 m_skip_frame_ctr = 0; @@ -161,14 +188,23 @@ namespace rsx protected: std::array get_color_surface_addresses() const; u32 get_zeta_surface_address() const; - RSXVertexProgram get_current_vertex_program() const; + + /** + * Analyze vertex inputs and group all interleaved blocks + */ + vertex_input_layout analyse_inputs_interleaved() const; + + RSXVertexProgram current_vertex_program = {}; + RSXFragmentProgram current_fragment_program = {}; + + void get_current_vertex_program(); /** * Gets current fragment program and associated fragment state * get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth * returns whether surface is a render target and surface pitch in native format */ - RSXFragmentProgram get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) const; + void get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info); public: double fps_limit = 59.94; @@ -232,7 +268,7 @@ namespace rsx std::variant get_draw_command(const rsx::rsx_state& state) const; - /* + /** * Immediate mode rendering requires a temp push buffer to hold attrib values * Appends a value to the push buffer (currently only supports 32-wide types) */ @@ -243,47 +279,24 @@ namespace rsx u32 get_push_buffer_index_count() const; protected: - //Save draw call parameters to detect instanced renders - std::pair m_last_first_count; - rsx::draw_command m_last_command; - bool is_probable_instanced_draw(); + /** + * Computes VRAM requirements needed to upload raw vertex streams + * result.first contains persistent memory requirements + * result.second contains volatile memory requirements + */ + std::pair calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count); - public: - //MT vertex streaming - struct upload_stream_packet - { - std::function post_upload_func; - gsl::span src_span; - gsl::span dst_span; - rsx::vertex_base_type type; - u32 vector_width; - u32 vertex_count; - u32 src_stride; - u8 dst_stride; - }; + /** + * Generates vertex input descriptors as an array of 16x4 s32s + */ + void fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer); - struct upload_stream_worker - { - std::shared_ptr worker_thread; - std::vector packets; - std::atomic thread_status = { 0 }; - }; - - struct upload_stream_task - { - std::array worker_threads; - int available_threads = 0; - std::atomic remaining_tasks = {0}; - }; - - upload_stream_task m_vertex_streaming_task; - void post_vertex_stream_to_upload(gsl::span src, gsl::span dst, rsx::vertex_base_type type, - u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, u32 vertex_count, - std::function callback); - void start_vertex_upload_task(); - void wait_for_vertex_upload_task(); - bool vertex_upload_task_ready(); + /** + * Uploads vertex data described in the layout descriptor + * Copies from local memory to the write-only output buffers provided in a sequential manner + */ + void write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data); private: std::mutex m_mtx_task; diff --git a/rpcs3/Emu/RSX/RSXVertexProgram.h b/rpcs3/Emu/RSX/RSXVertexProgram.h index 13cbda6d41..ff35209c4b 100644 --- a/rpcs3/Emu/RSX/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/RSXVertexProgram.h @@ -228,4 +228,5 @@ struct RSXVertexProgram std::vector data; std::vector rsx_vertex_inputs; u32 output_mask; + bool skip_vertex_input_check; }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 32bcc914a8..6e69a30e24 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -736,6 +736,8 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (!flushable) return false; + close_render_pass(); + if (synchronized) { if (m_last_flushable_cb >= 0) @@ -817,7 +819,6 @@ void VKGSRender::begin() m_vertex_cache->purge(); CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0)); - m_last_descriptor_set = VK_NULL_HANDLE; m_used_descriptors = 0; m_uniform_buffer_ring_info.reset_allocation_stats(); @@ -858,20 +859,6 @@ void VKGSRender::begin() m_setup_time += std::chrono::duration_cast(stop - start).count(); } -void VKGSRender::emit_geometry_instance(u32/* instance_count*/) -{ - begin_render_pass(); - - m_instanced_draws++; - //Repeat last command - if (!m_last_draw_indexed) - vkCmdDraw(*m_current_command_buffer, m_last_vertex_count, 1, 0, 0); - else - { - vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, m_last_ib_offset, m_last_ib_type); - vkCmdDrawIndexed(*m_current_command_buffer, m_last_vertex_count, 1, 0, 0, 0); - } -} void VKGSRender::begin_render_pass() { @@ -916,39 +903,8 @@ void VKGSRender::end() std::chrono::time_point program_start = steady_clock::now(); - const bool is_instanced = is_probable_instanced_draw() && m_last_descriptor_set != VK_NULL_HANDLE && m_program != nullptr; - - if (is_instanced) - { - //Copy descriptor set - VkCopyDescriptorSet copy_info[39]; - u8 descriptors_count = 0; - - for (u8 i = 0; i < 39; ++i) - { - if ((m_program->attribute_location_mask & (1ull << i)) == 0) - continue; - - const u8 n = descriptors_count; - - copy_info[n] = {}; - copy_info[n].sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; - copy_info[n].srcSet = m_last_descriptor_set; - copy_info[n].dstSet = descriptor_sets; - copy_info[n].srcBinding = i; - copy_info[n].dstBinding = i; - copy_info[n].srcArrayElement = 0; - copy_info[n].dstArrayElement = 0; - copy_info[n].descriptorCount = 1; - - descriptors_count++; - } - - vkUpdateDescriptorSets(*m_device, 0, nullptr, descriptors_count, (const VkCopyDescriptorSet*)©_info); - } - //Load program here since it is dependent on vertex state - if (!load_program(is_instanced)) + if (!load_program()) { LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw"); rsx::thread::end(); @@ -958,18 +914,6 @@ void VKGSRender::end() std::chrono::time_point program_stop = steady_clock::now(); //m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); - if (is_instanced) - { - //Only the program constants descriptors should have changed - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); - - emit_geometry_instance(1); - m_last_instanced_cb_index = m_current_cb_index; - rsx::thread::end(); - return; - } - close_render_pass(); //Texture upload stuff conflicts active RPs if (g_cfg.video.strict_rendering_mode) @@ -1173,9 +1117,6 @@ void VKGSRender::end() { const auto vertex_count = std::get<1>(upload_info); vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0); - - m_last_vertex_count = vertex_count; - m_last_draw_indexed = false; } else { @@ -1187,16 +1128,8 @@ void VKGSRender::end() vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); - - m_last_draw_indexed = false; - m_last_ib_type = index_type; - m_last_ib_offset = offset; - m_last_vertex_count = index_count; } - m_last_instanced_cb_index = ~0; - m_last_descriptor_set = descriptor_sets; - vk::leave_uninterruptible(); std::chrono::time_point draw_end = steady_clock::now(); @@ -1207,15 +1140,17 @@ void VKGSRender::end() if (g_cfg.video.overlay) { - if (m_last_vertex_count < 1024) + const auto vertex_count = std::get<1>(upload_info); + + if (vertex_count < 1024) m_uploads_small++; - else if (m_last_vertex_count < 2048) + else if (vertex_count < 2048) m_uploads_1k++; - else if (m_last_vertex_count < 4096) + else if (vertex_count < 4096) m_uploads_2k++; - else if (m_last_vertex_count < 8192) + else if (vertex_count < 8192) m_uploads_4k++; - else if (m_last_vertex_count < 16384) + else if (vertex_count < 16384) m_uploads_8k++; else m_uploads_16k++; @@ -1433,12 +1368,6 @@ void VKGSRender::copy_render_targets_to_dma_location() void VKGSRender::flush_command_queue(bool hard_sync) { - if (m_attrib_ring_info.mapped) - { - wait_for_vertex_upload_task(); - m_attrib_ring_info.unmap(); - } - close_render_pass(); close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence); @@ -1580,187 +1509,182 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) } } -bool VKGSRender::load_program(bool fast_update) +bool VKGSRender::load_program(bool) { - RSXVertexProgram vertex_program; - RSXFragmentProgram fragment_program; + auto &vertex_program = current_vertex_program; + auto &fragment_program = current_fragment_program; - if (!fast_update) + auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple { - auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple - { - vk::render_target *surface = nullptr; - if (!is_depth) - surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); - else - surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - - if (!surface) return std::make_tuple(false, 0); - - return std::make_tuple(true, surface->native_pitch); - }; - - fragment_program = get_current_fragment_program(rtt_lookup_func); - if (!fragment_program.valid) return false; - - vertex_program = get_current_vertex_program(); - - vk::pipeline_props properties = {}; - - properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - bool unused; - properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused); - - if (rsx::method_registers.restart_index_enabled()) - { - properties.ia.primitiveRestartEnable = VK_TRUE; - } + vk::render_target *surface = nullptr; + if (!is_depth) + surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); else - properties.ia.primitiveRestartEnable = VK_FALSE; + surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); + + if (!surface) return std::make_tuple(false, 0); + + return std::make_tuple(true, surface->native_pitch); + }; + + get_current_fragment_program(rtt_lookup_func); + if (!fragment_program.valid) return false; + + get_current_vertex_program(); + + vk::pipeline_props properties = {}; + + properties.ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + bool unused; + properties.ia.topology = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, unused); + + if (rsx::method_registers.restart_index_enabled()) + { + properties.ia.primitiveRestartEnable = VK_TRUE; + } + else + properties.ia.primitiveRestartEnable = VK_FALSE; - for (int i = 0; i < 4; ++i) - { - properties.att_state[i].colorWriteMask = 0xf; - properties.att_state[i].blendEnable = VK_FALSE; - } + for (int i = 0; i < 4; ++i) + { + properties.att_state[i].colorWriteMask = 0xf; + properties.att_state[i].blendEnable = VK_FALSE; + } - VkColorComponentFlags mask = 0; - if (rsx::method_registers.color_mask_a()) mask |= VK_COLOR_COMPONENT_A_BIT; - if (rsx::method_registers.color_mask_b()) mask |= VK_COLOR_COMPONENT_B_BIT; - if (rsx::method_registers.color_mask_g()) mask |= VK_COLOR_COMPONENT_G_BIT; - if (rsx::method_registers.color_mask_r()) mask |= VK_COLOR_COMPONENT_R_BIT; + VkColorComponentFlags mask = 0; + if (rsx::method_registers.color_mask_a()) mask |= VK_COLOR_COMPONENT_A_BIT; + if (rsx::method_registers.color_mask_b()) mask |= VK_COLOR_COMPONENT_B_BIT; + if (rsx::method_registers.color_mask_g()) mask |= VK_COLOR_COMPONENT_G_BIT; + if (rsx::method_registers.color_mask_r()) mask |= VK_COLOR_COMPONENT_R_BIT; - VkColorComponentFlags color_masks[4] = { mask }; + VkColorComponentFlags color_masks[4] = { mask }; - u8 render_targets[] = { 0, 1, 2, 3 }; + u8 render_targets[] = { 0, 1, 2, 3 }; + + for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) + { + properties.att_state[render_targets[idx]].colorWriteMask = mask; + } + + if (rsx::method_registers.blend_enabled()) + { + VkBlendFactor sfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_rgb()); + VkBlendFactor sfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_a()); + VkBlendFactor dfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_rgb()); + VkBlendFactor dfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_a()); + + VkBlendOp equation_rgb = vk::get_blend_op(rsx::method_registers.blend_equation_rgb()); + VkBlendOp equation_a = vk::get_blend_op(rsx::method_registers.blend_equation_a()); for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) { - properties.att_state[render_targets[idx]].colorWriteMask = mask; + properties.att_state[render_targets[idx]].blendEnable = VK_TRUE; + properties.att_state[render_targets[idx]].srcColorBlendFactor = sfactor_rgb; + properties.att_state[render_targets[idx]].dstColorBlendFactor = dfactor_rgb; + properties.att_state[render_targets[idx]].srcAlphaBlendFactor = sfactor_a; + properties.att_state[render_targets[idx]].dstAlphaBlendFactor = dfactor_a; + properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb; + properties.att_state[render_targets[idx]].alphaBlendOp = equation_a; } - if (rsx::method_registers.blend_enabled()) - { - VkBlendFactor sfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_rgb()); - VkBlendFactor sfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_sfactor_a()); - VkBlendFactor dfactor_rgb = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_rgb()); - VkBlendFactor dfactor_a = vk::get_blend_factor(rsx::method_registers.blend_func_dfactor_a()); - - VkBlendOp equation_rgb = vk::get_blend_op(rsx::method_registers.blend_equation_rgb()); - VkBlendOp equation_a = vk::get_blend_op(rsx::method_registers.blend_equation_a()); - - for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) - { - properties.att_state[render_targets[idx]].blendEnable = VK_TRUE; - properties.att_state[render_targets[idx]].srcColorBlendFactor = sfactor_rgb; - properties.att_state[render_targets[idx]].dstColorBlendFactor = dfactor_rgb; - properties.att_state[render_targets[idx]].srcAlphaBlendFactor = sfactor_a; - properties.att_state[render_targets[idx]].dstAlphaBlendFactor = dfactor_a; - properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb; - properties.att_state[render_targets[idx]].alphaBlendOp = equation_a; - } - - auto blend_colors = rsx::get_constant_blend_colors(); - properties.cs.blendConstants[0] = blend_colors[0]; - properties.cs.blendConstants[1] = blend_colors[1]; - properties.cs.blendConstants[2] = blend_colors[2]; - properties.cs.blendConstants[3] = blend_colors[3]; - } - else - { - for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) - { - properties.att_state[render_targets[idx]].blendEnable = VK_FALSE; - } - } - - properties.cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - properties.cs.attachmentCount = m_draw_buffers_count; - properties.cs.pAttachments = properties.att_state; - - if (rsx::method_registers.logic_op_enabled()) - { - properties.cs.logicOpEnable = true; - properties.cs.logicOp = vk::get_logic_op(rsx::method_registers.logic_operation()); - } - - properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - properties.ds.depthWriteEnable = rsx::method_registers.depth_write_enabled() ? VK_TRUE : VK_FALSE; - - if (rsx::method_registers.depth_bounds_test_enabled()) - { - properties.ds.depthBoundsTestEnable = VK_TRUE; - properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min(); - properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max(); - } - else - properties.ds.depthBoundsTestEnable = VK_FALSE; - - if (rsx::method_registers.stencil_test_enabled()) - { - properties.ds.stencilTestEnable = VK_TRUE; - properties.ds.front.writeMask = rsx::method_registers.stencil_mask(); - properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask(); - properties.ds.front.reference = rsx::method_registers.stencil_func_ref(); - properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail()); - properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()); - properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()); - properties.ds.front.compareOp = vk::get_compare_func(rsx::method_registers.stencil_func()); - - if (rsx::method_registers.two_sided_stencil_test_enabled()) - { - properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask(); - properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask(); - properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref(); - properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()); - properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()); - properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()); - properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func()); - } - else - properties.ds.back = properties.ds.front; - } - else - properties.ds.stencilTestEnable = VK_FALSE; - - if (rsx::method_registers.depth_test_enabled()) - { - properties.ds.depthTestEnable = VK_TRUE; - properties.ds.depthCompareOp = vk::get_compare_func(rsx::method_registers.depth_func()); - } - else - properties.ds.depthTestEnable = VK_FALSE; - - properties.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - properties.rs.polygonMode = VK_POLYGON_MODE_FILL; - properties.rs.depthClampEnable = VK_FALSE; - properties.rs.rasterizerDiscardEnable = VK_FALSE; - properties.rs.depthBiasEnable = VK_FALSE; - - if (rsx::method_registers.cull_face_enabled()) - properties.rs.cullMode = vk::get_cull_face(rsx::method_registers.cull_face_mode()); - else - properties.rs.cullMode = VK_CULL_MODE_NONE; - - properties.rs.frontFace = vk::get_front_face(rsx::method_registers.front_face_mode()); - - size_t idx = vk::get_render_pass_location( - vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, - vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), - (u8)m_draw_buffers_count); - - properties.render_pass = m_render_passes[idx]; - - properties.num_targets = m_draw_buffers_count; - - vk::enter_uninterruptible(); - - //Load current program from buffer - m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); + auto blend_colors = rsx::get_constant_blend_colors(); + properties.cs.blendConstants[0] = blend_colors[0]; + properties.cs.blendConstants[1] = blend_colors[1]; + properties.cs.blendConstants[2] = blend_colors[2]; + properties.cs.blendConstants[3] = blend_colors[3]; } else - vk::enter_uninterruptible(); + { + for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) + { + properties.att_state[render_targets[idx]].blendEnable = VK_FALSE; + } + } + + properties.cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + properties.cs.attachmentCount = m_draw_buffers_count; + properties.cs.pAttachments = properties.att_state; + + if (rsx::method_registers.logic_op_enabled()) + { + properties.cs.logicOpEnable = true; + properties.cs.logicOp = vk::get_logic_op(rsx::method_registers.logic_operation()); + } + + properties.ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + properties.ds.depthWriteEnable = rsx::method_registers.depth_write_enabled() ? VK_TRUE : VK_FALSE; + + if (rsx::method_registers.depth_bounds_test_enabled()) + { + properties.ds.depthBoundsTestEnable = VK_TRUE; + properties.ds.minDepthBounds = rsx::method_registers.depth_bounds_min(); + properties.ds.maxDepthBounds = rsx::method_registers.depth_bounds_max(); + } + else + properties.ds.depthBoundsTestEnable = VK_FALSE; + + if (rsx::method_registers.stencil_test_enabled()) + { + properties.ds.stencilTestEnable = VK_TRUE; + properties.ds.front.writeMask = rsx::method_registers.stencil_mask(); + properties.ds.front.compareMask = rsx::method_registers.stencil_func_mask(); + properties.ds.front.reference = rsx::method_registers.stencil_func_ref(); + properties.ds.front.failOp = vk::get_stencil_op(rsx::method_registers.stencil_op_fail()); + properties.ds.front.passOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zpass()); + properties.ds.front.depthFailOp = vk::get_stencil_op(rsx::method_registers.stencil_op_zfail()); + properties.ds.front.compareOp = vk::get_compare_func(rsx::method_registers.stencil_func()); + + if (rsx::method_registers.two_sided_stencil_test_enabled()) + { + properties.ds.back.writeMask = rsx::method_registers.back_stencil_mask(); + properties.ds.back.compareMask = rsx::method_registers.back_stencil_func_mask(); + properties.ds.back.reference = rsx::method_registers.back_stencil_func_ref(); + properties.ds.back.failOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_fail()); + properties.ds.back.passOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zpass()); + properties.ds.back.depthFailOp = vk::get_stencil_op(rsx::method_registers.back_stencil_op_zfail()); + properties.ds.back.compareOp = vk::get_compare_func(rsx::method_registers.back_stencil_func()); + } + else + properties.ds.back = properties.ds.front; + } + else + properties.ds.stencilTestEnable = VK_FALSE; + + if (rsx::method_registers.depth_test_enabled()) + { + properties.ds.depthTestEnable = VK_TRUE; + properties.ds.depthCompareOp = vk::get_compare_func(rsx::method_registers.depth_func()); + } + else + properties.ds.depthTestEnable = VK_FALSE; + + properties.rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + properties.rs.polygonMode = VK_POLYGON_MODE_FILL; + properties.rs.depthClampEnable = VK_FALSE; + properties.rs.rasterizerDiscardEnable = VK_FALSE; + properties.rs.depthBiasEnable = VK_FALSE; + + if (rsx::method_registers.cull_face_enabled()) + properties.rs.cullMode = vk::get_cull_face(rsx::method_registers.cull_face_mode()); + else + properties.rs.cullMode = VK_CULL_MODE_NONE; + + properties.rs.frontFace = vk::get_front_face(rsx::method_registers.front_face_mode()); + + size_t idx = vk::get_render_pass_location( + vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, + vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), + (u8)m_draw_buffers_count); + + properties.render_pass = m_render_passes[idx]; + + properties.num_targets = m_draw_buffers_count; + + vk::enter_uninterruptible(); + + //Load current program from buffer + m_program = m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get(); //TODO: Update constant buffers.. //1. Update scale-offset matrix @@ -1781,7 +1705,7 @@ bool VKGSRender::load_program(bool fast_update) m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, scale_offset_offset, 256 }, SCALE_OFFSET_BIND_SLOT, descriptor_sets); - if (!fast_update || m_transform_constants_dirty) + if (true)//m_transform_constants_dirty) { const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float)); @@ -1793,21 +1717,18 @@ bool VKGSRender::load_program(bool fast_update) m_transform_constants_dirty = false; } - if (!fast_update) - { - const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float)); - const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz); + const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + const size_t fragment_buffer_sz = fragment_constants_sz + (17 * 4 * sizeof(float)); + const size_t fragment_constants_offset = m_uniform_buffer_ring_info.alloc<256>(fragment_buffer_sz); - buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz); - if (fragment_constants_sz) - m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); + buf = (u8*)m_uniform_buffer_ring_info.map(fragment_constants_offset, fragment_buffer_sz); + if (fragment_constants_sz) + m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, fragment_program); - fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); - m_uniform_buffer_ring_info.unmap(); + fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); + m_uniform_buffer_ring_info.unmap(); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); - } + m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, descriptor_sets); vk::leave_uninterruptible(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index f43e8e1dc5..0c1024c467 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -200,14 +200,6 @@ private: std::thread::id rsx_thread; - VkPrimitiveTopology m_last_primititve_type; - VkIndexType m_last_ib_type; - VkDescriptorSet m_last_descriptor_set; - size_t m_last_ib_offset; - u32 m_last_vertex_count; - bool m_last_draw_indexed; - u32 m_last_instanced_cb_index; - bool render_pass_open = false; #ifdef __linux__ @@ -233,8 +225,6 @@ private: void begin_render_pass(); void close_render_pass(); - void emit_geometry_instance(u32 instance_count); - /// returns primitive topology, is_indexed, index_count, offset in index buffer, index type std::tuple > > upload_vertex_data(); public: diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 035611cc98..dbad6d15db 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -265,10 +265,18 @@ namespace return; // Fill vertex_array - u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); - u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); + const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); + const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); + const u32 upload_size = real_element_size * vertex_count; + const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size); + const uintptr_t local_addr = (uintptr_t)vertex_array.data.data(); - u32 upload_size = real_element_size * vertex_count; + if (auto found = vertex_cache->find_vertex_range(local_addr, format, upload_size)) + { + m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, format, found->offset_in_heap, upload_size)); + m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets); + return; + } VkDeviceSize offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size); void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size); @@ -281,9 +289,7 @@ namespace vk::prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count); m_attrib_ring_info.unmap(); - const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size); - const uintptr_t local_addr = (uintptr_t)vertex_array.data.data(); vertex_cache->store_range(local_addr, format, upload_size, (u32)offset_in_attrib_buffer); m_buffer_view_to_clean.push_back(std::make_unique(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size)); @@ -474,137 +480,9 @@ namespace const auto& vertex_buffers = get_vertex_buffers( rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}}); - - //1. Check if we can get all these allocations at once - std::vector memory_allocations(16); - std::vector allocated_sizes(16); - std::vector upload_jobs(16); - memory_allocations.resize(0); - allocated_sizes.resize(0); - upload_jobs.resize(0); - - for (int i = 0; i < vertex_buffers.size(); ++i) - { - const auto &vbo = vertex_buffers[i]; - bool can_multithread = false; - - if (vbo.which() == 0) - { - //vertex array buffer. We can thread this thing heavily - const auto& v = vbo.get(); - - const u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size); - const u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size); - const u32 upload_size = real_element_size * vertex_count; - const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size); - const uintptr_t local_addr = (uintptr_t)v.data.data(); - - const auto cached = rsxthr->m_vertex_cache->find_vertex_range(local_addr, format, upload_size); - if (cached) - { - m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets); - - continue; - } - - if (v.attribute_size > 1 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready()) - { - can_multithread = true; - - size_t offset = m_attrib_ring_info.alloc<256>(upload_size); - - memory_allocations.push_back(offset); - allocated_sizes.push_back(upload_size); - upload_jobs.push_back(i); - - const uintptr_t local_addr = (uintptr_t)v.data.data(); - rsxthr->m_vertex_cache->store_range(local_addr, format, upload_size, (u32)offset); - - m_buffer_view_to_clean.push_back(std::make_unique(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets); - } - } - - if (!can_multithread) - std::apply_visitor(visitor, vbo); - } - - if (memory_allocations.size() > 0) - { - if (memory_allocations.size() > 1) - { - //2 sets in case the blocks dont fit - u8 available_jobs[2] = {}; - u32 allocated_block[2] = {}; - - size_t last_offset = memory_allocations[0]; - u8 current_index = 0; - - for (int n = 0; n < memory_allocations.size(); ++n) - { - if (memory_allocations[n] < last_offset) - { - //queue went around - current_index = 1; - } - - available_jobs[current_index] ++; - allocated_block[current_index] += allocated_sizes[n]; - } - - int n = 0; - for (int task = 0; task < 2; ++task) - { - if (available_jobs[task]) - { - if (m_attrib_ring_info.mapped) - { - rsxthr->wait_for_vertex_upload_task(); - m_attrib_ring_info.unmap(); - } - - size_t space_remaining = allocated_block[task]; - size_t offset_base = memory_allocations[n]; - - gsl::byte* dst = (gsl::byte*)m_attrib_ring_info.map(memory_allocations[n], space_remaining); - - while (true) - { - if (space_remaining == 0) - break; - - const auto& vertex_array = vertex_buffers[upload_jobs[n]].get(); - const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); - - gsl::span dest_span(dst + (memory_allocations[n] - offset_base), allocated_sizes[n]); - rsxthr->post_vertex_stream_to_upload(vertex_array.data, dest_span, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size, vertex_count, vk::prepare_buffer_for_writing); - - space_remaining -= allocated_sizes[n]; - n++; - } - - rsxthr->start_vertex_upload_task(); - } - } - } - else - { - const size_t offset_in_attrib_buffer = memory_allocations[0]; - const u32 upload_size = allocated_sizes[0]; - const auto& vertex_array = vertex_buffers[upload_jobs[0]].get(); - const u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); - - void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size); - gsl::span dest_span(static_cast(dst), upload_size); - - write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, real_element_size); - vk::prepare_buffer_for_writing(dst, vertex_array.type, vertex_array.attribute_size, vertex_count); - - m_attrib_ring_info.unmap(); - } - } + for (auto &vbo: vertex_buffers) + std::apply_visitor(visitor, vbo); } u32 upload_inlined_array() diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index 93199f1e06..059ea786ff 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -9,7 +9,7 @@ namespace rsx f, ///< float sf, ///< half float ub, ///< unsigned byte interpreted as 0.f and 1.f - s32k, ///< signed 32bits int + s32k, ///< signed 16bits int cmp, ///< compressed aka X11G11Z10 and always 1. W. ub256, ///< unsigned byte interpreted as between 0 and 255. }; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index e49fc57640..30f6ff7289 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -459,20 +459,6 @@ namespace rsx rsx->m_textures_dirty[index] = true; } }; - - template - struct set_vertex_array_dirty_bit - { - static void impl(thread* rsx, u32, u32) - { - rsx->m_vertex_attribs_changed = true; - } - }; - - void set_idbuf_dirty_bit(thread* rsx, u32, u32) - { - rsx->m_index_buffer_changed = true; - } } namespace nv308a @@ -1544,8 +1530,6 @@ namespace rsx bind_range(); bind_range(); bind_range(); - bind_range(); - bind(); bind(); bind(); bind(); diff --git a/rpcs3/Emu/RSX/rsx_vertex_data.h b/rpcs3/Emu/RSX/rsx_vertex_data.h index 4ba9bc4849..82d83663e6 100644 --- a/rpcs3/Emu/RSX/rsx_vertex_data.h +++ b/rpcs3/Emu/RSX/rsx_vertex_data.h @@ -60,8 +60,8 @@ public: struct push_buffer_vertex_info { - u8 size; - vertex_base_type type; + u8 size = 0; + vertex_base_type type = vertex_base_type::f; u32 vertex_count = 0; u32 attribute_mask = ~0; @@ -72,6 +72,7 @@ struct push_buffer_vertex_info data.resize(0); attribute_mask = ~0; vertex_count = 0; + size = 0; } u8 get_vertex_size_in_dwords(vertex_base_type type)