diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index fae572c5b8..51b6f84c24 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -276,45 +276,6 @@ void GLGSRender::begin() //NV4097_SET_FLAT_SHADE_OP //NV4097_SET_EDGE_FLAG - auto set_clip_plane_control = [&](int index, rsx::user_clip_plane_op control) - { - int value = 0; - int location; - - if (m_program->uniforms.has_location("uc_m" + std::to_string(index), &location)) - { - switch (control) - { - default: - LOG_ERROR(RSX, "bad clip plane control (0x%x)", (u8)control); - - case rsx::user_clip_plane_op::disable: - value = 0; - break; - - case rsx::user_clip_plane_op::greater_or_equal: - value = 1; - break; - - case rsx::user_clip_plane_op::less_than: - value = -1; - break; - } - - __glcheck m_program->uniforms[location] = value; - } - - __glcheck enable(value, GL_CLIP_DISTANCE0 + index); - }; - - load_program(); - set_clip_plane_control(0, rsx::method_registers.clip_plane_0_enabled()); - set_clip_plane_control(1, rsx::method_registers.clip_plane_1_enabled()); - set_clip_plane_control(2, rsx::method_registers.clip_plane_2_enabled()); - set_clip_plane_control(3, rsx::method_registers.clip_plane_3_enabled()); - set_clip_plane_control(4, rsx::method_registers.clip_plane_4_enabled()); - set_clip_plane_control(5, rsx::method_registers.clip_plane_5_enabled()); - if (__glcheck enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE)) { __glcheck glCullFace(cull_face(rsx::method_registers.cull_face_mode())); @@ -369,6 +330,56 @@ void GLGSRender::end() return; } + std::chrono::time_point program_start = steady_clock::now(); + + //Load program here since it is dependent on vertex state + load_program(); + + std::chrono::time_point program_stop = steady_clock::now(); + m_begin_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); + + //Set active user clip planes + const rsx::user_clip_plane_op clip_plane_control[6] = + { + rsx::method_registers.clip_plane_0_enabled(), + rsx::method_registers.clip_plane_1_enabled(), + rsx::method_registers.clip_plane_2_enabled(), + rsx::method_registers.clip_plane_3_enabled(), + rsx::method_registers.clip_plane_4_enabled(), + rsx::method_registers.clip_plane_5_enabled(), + }; + + for (int index = 0; index < 6; ++index) + { + int value = 0; + int location; + + if (m_program->uniforms.has_location("uc_m" + std::to_string(index), &location)) + { + switch (clip_plane_control[index]) + { + default: + LOG_ERROR(RSX, "bad clip plane control (0x%x)", (u8)clip_plane_control[index]); + + case rsx::user_clip_plane_op::disable: + value = 0; + break; + + case rsx::user_clip_plane_op::greater_or_equal: + value = 1; + break; + + case rsx::user_clip_plane_op::less_than: + value = -1; + break; + } + + __glcheck m_program->uniforms[location] = value; + } + + __glcheck enable(value, GL_CLIP_DISTANCE0 + index); + }; + if (manually_flush_ring_buffers) { //Use approximations to reseve space. This path is mostly for debug purposes anyway diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index da9aa9d799..891d528a05 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -329,16 +329,38 @@ namespace rsx void thread::begin() { rsx::method_registers.current_draw_clause.inline_vertex_array.clear(); + in_begin_end = true; + } + + void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, u32 value) + { + vertex_push_buffers[attribute].size = size; + vertex_push_buffers[attribute].append_vertex_data(subreg_index, value); + } + + u32 thread::get_push_buffer_vertex_count() + { + //There's no restriction on which attrib shall hold vertex data, so we check them all + u32 max_vertex_count = 0; + for (auto &buf: vertex_push_buffers) + { + max_vertex_count = std::max(max_vertex_count, buf.vertex_count); + } + + return max_vertex_count; } void thread::end() { rsx::method_registers.transform_constants.clear(); + in_begin_end = false; for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { //Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 //rsx::method_registers.register_vertex_info[index].size = 0; + + vertex_push_buffers[index].clear(); } if (capture_current_frame) @@ -670,7 +692,8 @@ namespace rsx return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size}; } - std::vector> thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const + std::vector> + thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const { std::vector> result; result.reserve(rsx::limits::vertex_count); @@ -690,6 +713,16 @@ namespace rsx continue; } + if (vertex_push_buffers[index].vertex_count > 1) + { + const rsx::register_vertex_data_info& info = state.register_vertex_info[index]; + const u8 element_size = info.size * sizeof(u32); + + gsl::span vertex_src = { (const gsl::byte*)vertex_push_buffers[index].data.data(), vertex_push_buffers[index].vertex_count * element_size }; + result.push_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index }); + continue; + } + if (state.register_vertex_info[index].size > 0) { const rsx::register_vertex_data_info& info = state.register_vertex_info[index]; @@ -827,7 +860,7 @@ namespace rsx RSXVertexProgram thread::get_current_vertex_program() const { RSXVertexProgram result = {}; - u32 transform_program_start = rsx::method_registers.transform_program_start(); + const u32 transform_program_start = rsx::method_registers.transform_program_start(); result.data.reserve((512 - transform_program_start) * 4); for (int i = transform_program_start; i < 512; ++i) @@ -843,8 +876,8 @@ namespace rsx } result.output_mask = rsx::method_registers.vertex_attrib_output_mask(); - u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); - u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); + const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); + const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); result.rsx_vertex_inputs.clear(); for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { @@ -862,6 +895,16 @@ namespace rsx true, is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0}); } + else if (vertex_push_buffers[index].vertex_count > 1) + { + result.rsx_vertex_inputs.push_back( + { index, + rsx::method_registers.register_vertex_info[index].size, + 1, + false, + true, + is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); + } else if (rsx::method_registers.register_vertex_info[index].size > 0) { result.rsx_vertex_inputs.push_back( diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 902f266cc6..8490580c17 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -168,6 +168,7 @@ namespace rsx protected: std::stack m_call_stack; + std::array vertex_push_buffers; public: old_shaders_cache::shaders_cache shaders_cache; @@ -233,6 +234,7 @@ namespace rsx public: std::set m_used_gcm_commands; bool invalid_command_interrupt_raised = false; + bool in_begin_end = false; protected: thread(); @@ -265,10 +267,19 @@ namespace rsx gsl::span get_raw_index_array(const std::vector >& draw_indexed_clause) const; gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; - std::vector> get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const; + std::vector> + get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const; + std::variant get_draw_command(const rsx::rsx_state& state) const; + /* + * Immediate mode rendering requires a temp push buffer to hold attrib values + * Appends a value to the push buffer (currently only supports 32-wide types) + */ + void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, u32 value); + u32 get_push_buffer_vertex_count(); + private: std::mutex m_mtx_task; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ba562bf2d4..7c568855f0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -658,9 +658,6 @@ void VKGSRender::begin() init_buffers(); - if (!load_program()) - return; - float actual_line_width = rsx::method_registers.line_width(); vkCmdSetLineWidth(m_command_buffer, actual_line_width); @@ -682,6 +679,14 @@ void VKGSRender::end() (u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size()); VkRenderPass current_render_pass = m_render_passes[idx]; + std::chrono::time_point program_start = steady_clock::now(); + + //Load program here since it is dependent on vertex state + load_program(); + + std::chrono::time_point program_stop = steady_clock::now(); + m_setup_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); + std::chrono::time_point textures_start = steady_clock::now(); for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 5b51301aff..9db3807bac 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -116,6 +116,9 @@ namespace rsx static const size_t attribute_index = index / increment_per_array_index; static const size_t vertex_subreg = index % increment_per_array_index; + if (rsx->in_begin_end) + rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, arg); + auto& info = rsx::method_registers.register_vertex_info[attribute_index]; info.type = vertex_data_type_from_element_type::type; @@ -246,30 +249,12 @@ namespace rsx return; } - u32 max_vertex_count = 0; - - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) - { - auto &vertex_info = rsx::method_registers.register_vertex_info[index]; - - if (vertex_info.size > 0) - { - u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); - u32 element_count = vertex_info.size; - - vertex_info.frequency = element_count; - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::none) - { - max_vertex_count = std::max(max_vertex_count, element_count); - } - } - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::none && max_vertex_count) + //Check if we have immediate mode vertex data in a driver-local buffer + const u32 push_buffer_vertices_count = rsxthr->get_push_buffer_vertex_count(); + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::none && push_buffer_vertices_count) { rsx::method_registers.current_draw_clause.command = rsx::draw_command::array; - rsx::method_registers.current_draw_clause.first_count_commands.push_back(std::make_pair(0, max_vertex_count)); + rsx::method_registers.current_draw_clause.first_count_commands.push_back(std::make_pair(0, push_buffer_vertices_count)); } if (!(rsx::method_registers.current_draw_clause.first_count_commands.empty() && diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 1141b14887..37b66f8e2a 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -153,12 +153,13 @@ namespace rsx /** * RSX can sources vertex attributes from 2 places: - * - Immediate values passed by NV4097_SET_VERTEX_DATA*_M + ARRAY_ID write. + * 1. Immediate values passed by NV4097_SET_VERTEX_DATA*_M + ARRAY_ID write. * For a given ARRAY_ID the last command of this type defines the actual type of the immediate value. - * Since there can be only a single value per ARRAY_ID passed this way, all vertex in the draw call + * If there is only a single value on an ARRAY_ID passed this way, all vertex in the draw call * shares it. - * - Vertex array values passed by offset/stride/size/format description. + * Immediate mode rendering uses this method as well to upload vertex data. * + * 2. Vertex array values passed by offset/stride/size/format description. * A given ARRAY_ID can have both an immediate value and a vertex array enabled at the same time * (See After Burner Climax intro cutscene). In such case the vertex array has precedence over the * immediate value. As soon as the vertex array is disabled (size set to 0) the immediate value diff --git a/rpcs3/Emu/RSX/rsx_vertex_data.h b/rpcs3/Emu/RSX/rsx_vertex_data.h index 8b868c9a7f..c1e703873e 100644 --- a/rpcs3/Emu/RSX/rsx_vertex_data.h +++ b/rpcs3/Emu/RSX/rsx_vertex_data.h @@ -2,6 +2,7 @@ #include "GCM.h" #include "Utilities/types.h" +#include "Utilities/BEType.h" namespace rsx { @@ -57,6 +58,39 @@ public: } }; +struct push_buffer_vertex_info +{ + u8 size; + vertex_base_type type; + + u32 vertex_count = 0; + u32 attribute_mask = ~0; + std::vector data; + + void clear() + { + data.resize(0); + attribute_mask = ~0; + vertex_count = 0; + } + + void append_vertex_data(u32 sub_index, u32 arg) + { + const u32 element_mask = (1 << sub_index); + if (attribute_mask & element_mask) + { + attribute_mask = 0; + + vertex_count++; + data.resize(vertex_count * size); + } + + attribute_mask |= element_mask; + u32* dst = data.data() + ((vertex_count - 1) * size) + sub_index; + *dst = se_storage::swap(arg); + } +}; + struct register_vertex_data_info { u16 frequency = 0;