diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index a954b9e67e..4db7d07112 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -401,6 +401,7 @@ target_sources(rpcs3_emu PRIVATE RSX/GSRender.cpp RSX/RSXFIFO.cpp RSX/rsx_methods.cpp + RSX/rsx_vertex_data.cpp RSX/RSXOffload.cpp RSX/RSXTexture.cpp RSX/RSXThread.cpp diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fcbe364349..e0a4a2de34 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -473,26 +473,28 @@ namespace rsx void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) { - vertex_push_buffers[attribute].size = size; - vertex_push_buffers[attribute].append_vertex_data(subreg_index, type, value); + if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute))) + { + return; + } + + // Enforce ATTR0 as vertex attribute for push buffers. + // This whole thing becomes a mess if we don't have a provoking attribute. + const auto vertex_id = vertex_push_buffers[0].get_vertex_id(); + vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value); } u32 thread::get_push_buffer_vertex_count() const { - //There's no restriction on which attrib shall hold vertex data, so we check them all - u32 max_vertex_count = 0; - for (auto &buf: vertex_push_buffers) - { - max_vertex_count = std::max(max_vertex_count, buf.vertex_count); - } - - return max_vertex_count; + // Enforce ATTR0 as vertex attribute for push buffers. + // This whole thing becomes a mess if we don't have a provoking attribute. + return vertex_push_buffers[0].vertex_count; } void thread::append_array_element(u32 index) { - //Endianness is swapped because common upload code expects input in BE - //TODO: Implement fast upload path for LE inputs and do away with this + // Endianness is swapped because common upload code expects input in BE + // TODO: Implement fast upload path for LE inputs and do away with this element_push_buffer.push_back(std::bit_cast>(index)); } @@ -1732,7 +1734,7 @@ namespace rsx current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask); } - void thread::analyse_inputs_interleaved(vertex_input_layout& result) const + void thread::analyse_inputs_interleaved(vertex_input_layout& result) { const rsx_state& state = rsx::method_registers; const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask; @@ -1800,6 +1802,9 @@ namespace rsx // Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults if (vertex_push_buffers[index].vertex_count > 1) { + // Ensure consistent number of vertices per attribute. + vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false); + // Read temp buffer (register array) std::pair volatile_range_info = std::make_pair(index, static_cast(vertex_push_buffers[index].data.size() * sizeof(u32))); result.volatile_blocks.push_back(volatile_range_info); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index a90de45745..3cf769941c 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -745,7 +745,7 @@ namespace rsx /** * Analyze vertex inputs and group all interleaved blocks */ - void analyse_inputs_interleaved(vertex_input_layout&) const; + void analyse_inputs_interleaved(vertex_input_layout&); RSXVertexProgram current_vertex_program = {}; RSXFragmentProgram current_fragment_program = {}; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 3e11351302..a1415add44 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -281,10 +281,10 @@ namespace rsx if (rsx->in_begin_end) { // Update to immediate mode register/array + // NOTE: Push buffers still behave like register writes. + // You do not need to specify each attribute for each vertex, the register is referenced instead. + // This is classic OpenGL 1.x behavior as I remember. rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg); - - // NOTE: one can update the register to update constant across primitive. Needs verification. - // Fall through } auto& info = rsx::method_registers.register_vertex_info[attribute_index]; diff --git a/rpcs3/Emu/RSX/rsx_vertex_data.cpp b/rpcs3/Emu/RSX/rsx_vertex_data.cpp new file mode 100644 index 0000000000..07c8abec07 --- /dev/null +++ b/rpcs3/Emu/RSX/rsx_vertex_data.cpp @@ -0,0 +1,101 @@ +#include "stdafx.h" +#include "rsx_vertex_data.h" +#include "rsx_methods.h" + +namespace rsx +{ + void push_buffer_vertex_info::clear() + { + if (size) + { + data.clear(); + vertex_count = 0; + dword_count = 0; + size = 0; + } + } + + u8 push_buffer_vertex_info::get_vertex_size_in_dwords() const + { + // NOTE: Types are always provided to fit into 32-bits + // i.e no less than 4 8-bit values and no less than 2 16-bit values + + switch (type) + { + case vertex_base_type::f: + return size; + case vertex_base_type::ub: + case vertex_base_type::ub256: + return 1; + case vertex_base_type::s1: + case vertex_base_type::s32k: + return size / 2; + default: + fmt::throw_exception("Unsupported vertex base type %d", static_cast(type)); + } + } + + u32 push_buffer_vertex_info::get_vertex_id() const + { + ensure(attr == 0); // Only ask ATTR0 for vertex ID + + // Which is the current vertex ID to be written to? + // NOTE: Fully writing to ATTR0 closes the current block + return size ? (dword_count / get_vertex_size_in_dwords()) : 0; + } + + void push_buffer_vertex_info::set_vertex_data(u32 attribute_id, u32 vertex_id, u32 sub_index, vertex_base_type type, u32 size, u32 arg) + { + if (vertex_count && (type != this->type || size != this->size)) + { + // TODO: Should forcefully break the draw call on this step using an execution barrier. + // While RSX can handle this behavior without problem, it can only be the product of nonsensical game design. + rsx_log.error("Vertex attribute %u was respecced mid-draw (type = %d vs %d, size = %u vs %u). Indexed execution barrier required. Report this to developers.", + attribute_id, static_cast(type), static_cast(this->type), size, this->size); + } + + this->type = type; + this->size = size; + this->attr = attribute_id; + + const auto required_vertex_count = (vertex_id + 1); + const auto vertex_size = get_vertex_size_in_dwords(); + + if (vertex_count != required_vertex_count) + { + pad_to(required_vertex_count, true); + ensure(vertex_count == required_vertex_count); + } + + auto current_vertex = data.data() + ((vertex_count - 1) * vertex_size); + current_vertex[sub_index] = arg; + ++dword_count; + } + + void push_buffer_vertex_info::pad_to(u32 required_vertex_count, bool skip_last) + { + if (vertex_count >= required_vertex_count) + { + return; + } + + const auto vertex_size = get_vertex_size_in_dwords(); + data.resize(vertex_size * required_vertex_count); + + // For all previous verts, copy over the register contents duplicated over the stream. + // Internally it appears RSX actually executes the draw commands as they are encountered. + // You can change register data contents mid-way for example and it will pick up for the next N draws. + // This is how immediate mode is implemented internally. + u32* src = rsx::method_registers.register_vertex_info[attr].data.data(); + u32* dst = data.data() + (vertex_count * vertex_size); + u32* end = data.data() + ((required_vertex_count - (skip_last ? 1 : 0)) * vertex_size); + + while (dst < end) + { + std::memcpy(dst, src, vertex_size * sizeof(u32)); + dst += vertex_size; + } + + vertex_count = required_vertex_count; + } +} diff --git a/rpcs3/Emu/RSX/rsx_vertex_data.h b/rpcs3/Emu/RSX/rsx_vertex_data.h index 2f38c995b4..b7261105cb 100644 --- a/rpcs3/Emu/RSX/rsx_vertex_data.h +++ b/rpcs3/Emu/RSX/rsx_vertex_data.h @@ -1,7 +1,7 @@ #pragma once #include "gcm_enums.h" - +#include "rsx_decode.h" #include "Common/simple_array.hpp" #include "util/types.hpp" @@ -56,64 +56,23 @@ public: struct push_buffer_vertex_info { - u8 size = 0; + u32 attr = 0; + u32 size = 0; vertex_base_type type = vertex_base_type::f; u32 vertex_count = 0; - u32 attribute_mask = ~0; + u32 dword_count = 0; rsx::simple_array data; - void clear() - { - if (size) - { - data.clear(); - attribute_mask = ~0; - vertex_count = 0; - size = 0; - } - } + push_buffer_vertex_info() = default; + ~push_buffer_vertex_info() = default; - u8 get_vertex_size_in_dwords(vertex_base_type type) const - { - //NOTE: Types are always provided to fit into 32-bits - //i.e no less than 4 8-bit values and no less than 2 16-bit values + u8 get_vertex_size_in_dwords() const; + u32 get_vertex_id() const; - switch (type) - { - case vertex_base_type::f: - return size; - case vertex_base_type::ub: - case vertex_base_type::ub256: - return 1; - case vertex_base_type::s1: - case vertex_base_type::s32k: - return size / 2; - default: - fmt::throw_exception("Unsupported vertex base type %d", static_cast(type)); - } - } - - void append_vertex_data(u32 sub_index, vertex_base_type type, u32 arg) - { - const u32 element_mask = (1 << sub_index); - const u8 vertex_size = get_vertex_size_in_dwords(type); - - this->type = type; - - if (attribute_mask & element_mask) - { - attribute_mask = 0; - - vertex_count++; - data.resize(vertex_count * vertex_size); - } - - attribute_mask |= element_mask; - - u32* dst = data.data() + ((vertex_count - 1) * vertex_size) + sub_index; - *dst = arg; - } + void clear(); + void set_vertex_data(u32 attribute_id, u32 vertex_id, u32 sub_index, vertex_base_type type, u32 size, u32 arg); + void pad_to(u32 required_vertex_count, bool skip_last); }; struct register_vertex_data_info diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index d3f4c2122a..eda616cd9e 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -86,6 +86,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 551fa21f30..a682048b9b 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1027,6 +1027,9 @@ Emu\Io + + Emu\GPU\RSX +