diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp index f05caed338..7b0a2689c2 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp @@ -258,7 +258,6 @@ s32 sys_rsx_context_attribute(s32 context_id, u32 package_id, u64 a3, u64 a4, u6 render->pause(); render->ctrl->get = a3; render->ctrl->put = a4; - render->internal_get = a3; render->restore_point = a3; render->unpause(); break; diff --git a/rpcs3/Emu/RSX/Capture/rsx_capture.cpp b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp index eb34bce640..6a0df50c34 100644 --- a/rpcs3/Emu/RSX/Capture/rsx_capture.cpp +++ b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "rsx_capture.h" #include "Emu/RSX/Common/BufferUtils.h" #include "Emu/RSX/Common/TextureUtils.h" @@ -175,15 +175,15 @@ namespace rsx const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size()); const u32 vertStride = info.stride(); - for (const auto& count : method_registers.current_draw_clause.first_count_commands) + for (const auto& range : method_registers.current_draw_clause.draw_command_ranges) { - const u32 vertCount = count.second; + const u32 vertCount = range.count; const size_t bufferSize = vertCount * vertStride + vertSize; frame_capture_data::memory_block block; block.ioOffset = base_address; block.location = memory_location; - block.offset = (count.first * vertStride); + block.offset = (range.first * vertStride); frame_capture_data::memory_block_data block_data; block_data.data.resize(bufferSize); std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize); @@ -211,10 +211,10 @@ namespace rsx const bool is_primitive_restart_enabled = method_registers.restart_index_enabled(); const u32 primitive_restart_index = method_registers.restart_index(); - for (const auto& count : method_registers.current_draw_clause.first_count_commands) + for (const auto& range : method_registers.current_draw_clause.draw_command_ranges) { - const u32 idxFirst = count.first; - const u32 idxCount = count.second; + const u32 idxFirst = range.first; + const u32 idxCount = range.count; const u32 idxAddr = base_addr + (idxFirst * type_size); const size_t bufferSize = idxCount * type_size; diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 71caafc79f..fdd28d9d95 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "BufferUtils.h" #include "../rsx_methods.h" #include "Utilities/sysinfo.h" @@ -435,11 +435,14 @@ namespace } } -void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness) +void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, const std::vector& first_count_commands, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness) { verify(HERE), (vector_element_count > 0); const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count); + // HACK! This is a legacy routine only used by D3D12 + const u32 count = first_count_commands.front().count; + bool use_stream_no_stride = false; bool use_stream_with_stride = false; @@ -796,13 +799,9 @@ namespace template std::tuple write_index_array_data_to_buffer_impl(gsl::span dst, gsl::span> src, - rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const std::vector > &first_count_arguments, + rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const rsx::draw_range_t &range, u32 base_index, std::function expands) { - u32 first; - u32 count; - std::tie(first, count) = get_first_count_from_draw_indexed_clause(first_count_arguments); - if (!expands(draw_mode)) return upload_untouched(src, dst, restart_index_enabled, restart_index, base_index); switch (draw_mode) @@ -810,7 +809,7 @@ namespace case rsx::primitive_type::line_loop: { const auto &returnvalue = upload_untouched(src, dst, restart_index_enabled, restart_index, base_index); - dst[count] = src[0]; + dst[range.count] = src[0]; return returnvalue; } case rsx::primitive_type::polygon: @@ -824,21 +823,54 @@ namespace } } -std::tuple write_index_array_data_to_buffer(gsl::span dst, - gsl::span src, - rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const std::vector > &first_count_arguments, +std::tuple write_index_array_data_to_buffer(gsl::span dst_ptr, + gsl::span src_ptr, + rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, + const std::vector &first_count_arguments, u32 base_index, std::function expands) { - switch (type) + u32 read = 0; + u32 written = 0; + u32 min_index = -1u; + u32 max_index = 0; + + const u32 type_size = get_index_type_size(type); + + for (const auto &range : first_count_arguments) { - case rsx::index_array_type::u16: - return write_index_array_data_to_buffer_impl(as_span_workaround(dst), - as_const_span>(src), draw_mode, restart_index_enabled, restart_index, first_count_arguments, base_index, expands); - case rsx::index_array_type::u32: - return write_index_array_data_to_buffer_impl(as_span_workaround(dst), - as_const_span>(src), draw_mode, restart_index_enabled, restart_index, first_count_arguments, base_index, expands); + auto src = src_ptr.subspan(range.command_data_offset, range.count * type_size); + auto dst = dst_ptr.subspan(written * type_size); + + switch (type) + { + case rsx::index_array_type::u16: + { + auto ret = write_index_array_data_to_buffer_impl(as_span_workaround(dst), + as_const_span>(src), draw_mode, restart_index_enabled, restart_index, range, base_index, expands); + + min_index = std::min(std::get<0>(ret), min_index); + max_index = std::min(std::get<1>(ret), max_index); + written += std::get<2>(ret); + break; + } + case rsx::index_array_type::u32: + { + auto ret = write_index_array_data_to_buffer_impl(as_span_workaround(dst), + as_const_span>(src), draw_mode, restart_index_enabled, restart_index, range, base_index, expands); + + min_index = std::min(std::get<0>(ret), min_index); + max_index = std::min(std::get<1>(ret), max_index); + written += std::get<2>(ret); + break; + } + default: + fmt::throw_exception("Unreachable" HERE); + } + + read += range.count; } - fmt::throw_exception("Unknown index type" HERE); + + return std::make_tuple(min_index, max_index, written); } void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index 01d98e6cd7..b34be9bce6 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include @@ -10,7 +10,7 @@ * Write count vertex attributes from src_ptr. * src_ptr array layout is deduced from the type, vector element count and src_stride arguments. */ -void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness); +void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, const std::vector& first_count_commands, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness); /* * If primitive mode is not supported and need to be emulated (using an index buffer) returns false. @@ -33,7 +33,7 @@ u32 get_index_type_size(rsx::index_array_type type); * The function expands index buffer for non native primitive type if expands(draw_mode) return true. */ std::tuple write_index_array_data_to_buffer(gsl::span dst, gsl::span src, - rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const std::vector > &first_count_arguments, + rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const std::vector &first_count_arguments, u32 base_index, std::function expands); /** diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index d9c24ab073..8585b81214 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -1,4 +1,4 @@ -#ifdef _MSC_VER +#ifdef _MSC_VER #include "stdafx.h" #include "stdafx_d3d12.h" @@ -41,14 +41,6 @@ namespace fmt::throw_exception("Wrong vector size %d" HERE, size); } - u32 get_vertex_count(const std::vector >& first_count_commands) - { - u32 vertex_count = 0; - for (const auto &pair : first_count_commands) - vertex_count += pair.second; - return vertex_count; - } - D3D12_SHADER_RESOURCE_VIEW_DESC get_vertex_attribute_srv(const rsx::data_array_format_info &info, UINT64 offset_in_vertex_buffers_buffer, UINT buffer_size) { u32 element_size = rsx::get_vertex_type_size_on_host(info.type(), info.size()); @@ -166,7 +158,7 @@ namespace m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); gsl::span mapped_buffer_span = { (gsl::byte*)mapped_buffer, gsl::narrow_cast(buffer_size)}; - write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count, + write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, rsx::method_registers.current_draw_clause.draw_command_ranges, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); @@ -219,11 +211,11 @@ namespace }; std::tuple generate_index_buffer_for_emulated_primitives_array( - const std::vector>& vertex_ranges, d3d12_data_heap& m_buffer_data) + const std::vector & vertex_ranges, d3d12_data_heap& m_buffer_data) { size_t index_count = std::accumulate( vertex_ranges.begin(), vertex_ranges.end(), 0ll, [](size_t acc, const auto& pair) { - return acc + get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second); + return acc + get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.count); }); // Alloc @@ -236,7 +228,7 @@ namespace u32 vertex_count = 0; for (const auto& pair : vertex_ranges) - vertex_count += pair.second; + vertex_count += pair.count; write_index_array_for_non_indexed_non_native_primitive_to_buffer((char *)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, vertex_count); @@ -257,8 +249,8 @@ namespace * range, and whose second element is the number of vertex in this range. */ std::vector upload_vertex_attributes( - const std::vector>& vertex_ranges, - std::function>)> + std::vector vertex_ranges, + std::function)> get_vertex_buffers, ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data, ID3D12GraphicsCommandList* command_list) @@ -267,11 +259,13 @@ namespace &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST)); - u32 vertex_count = get_vertex_count(vertex_ranges); + u32 vertex_count = 0; + for (const auto &range : vertex_ranges) + vertex_count += range.count; vertex_buffer_visitor visitor( vertex_count, command_list, m_vertex_buffer_data, m_buffer_data); - const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, vertex_ranges); + const auto& vertex_buffers = get_vertex_buffers(vertex_ranges); for (const auto& vbo : vertex_buffers) std::visit(visitor, vbo); @@ -354,9 +348,7 @@ namespace { draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data, ID3D12Resource* vertex_buffer_data, - std::function>&)> - get_vertex_info_lambda) + std::function&)> get_vertex_info_lambda) : command_list(cmd_list), m_buffer_data(buffer_data), m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda) { @@ -366,9 +358,10 @@ namespace const rsx::draw_array_command& command) { if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { - size_t vertex_count = get_vertex_count(command.indexes_range); + size_t vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); return std::make_tuple(false, vertex_count, - upload_vertex_attributes(command.indexes_range, get_vertex_buffers, + upload_vertex_attributes(rsx::method_registers.current_draw_clause.draw_command_ranges, + get_vertex_buffers, m_vertex_buffer_data, m_buffer_data, command_list)); } @@ -376,10 +369,11 @@ namespace size_t index_count; std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array( - command.indexes_range, m_buffer_data); + rsx::method_registers.current_draw_clause.draw_command_ranges, m_buffer_data); command_list->IASetIndexBuffer(&index_buffer_view); return std::make_tuple(true, index_count, - upload_vertex_attributes(command.indexes_range, get_vertex_buffers, + upload_vertex_attributes(rsx::method_registers.current_draw_clause.draw_command_ranges, + get_vertex_buffers, m_vertex_buffer_data, m_buffer_data, command_list)); } @@ -389,7 +383,7 @@ namespace // Index count size_t index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, - ::narrow(get_vertex_count(command.ranges_to_fetch_in_index_buffer))); + rsx::method_registers.current_draw_clause.get_elements_count()); rsx::index_array_type indexed_type = rsx::method_registers.current_draw_clause.is_immediate_draw? rsx::index_array_type::u32: @@ -412,7 +406,7 @@ namespace write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.restart_index_enabled(), - rsx::method_registers.restart_index(), command.ranges_to_fetch_in_index_buffer, + rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.draw_command_ranges, rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); }); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); @@ -423,7 +417,7 @@ namespace command_list->IASetIndexBuffer(&index_buffer_view); return std::make_tuple(true, index_count, - upload_vertex_attributes({std::make_pair(0, max_index + 1)}, get_vertex_buffers, + upload_vertex_attributes({ {0, max_index + 1} }, get_vertex_buffers, m_vertex_buffer_data, m_buffer_data, command_list)); } @@ -434,8 +428,8 @@ namespace std::vector vertex_buffer_view; std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array(rsx::method_registers.vertex_arrays_info, - {(const gsl::byte*)command.inline_vertex_array.data(), - ::narrow(command.inline_vertex_array.size() * sizeof(uint))}, + {(const gsl::byte*)rsx::method_registers.current_draw_clause.inline_vertex_array.data(), + ::narrow(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(uint))}, m_buffer_data, m_vertex_buffer_data, command_list); if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) @@ -453,9 +447,7 @@ namespace private: ID3D12GraphicsCommandList* command_list; d3d12_data_heap& m_buffer_data; - std::function>&)> - get_vertex_buffers; + std::function&)> get_vertex_buffers; ID3D12Resource* m_vertex_buffer_data; }; } // End anonymous namespace @@ -465,8 +457,7 @@ D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* comma { return std::visit( draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(), - [this]( - const auto& state, const auto& list) { return get_vertex_buffers(state, list, 0); }), + [this](const auto& list) { return get_vertex_buffers(rsx::method_registers, list, 0); }), get_draw_command(rsx::method_registers)); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 71a39e7c5b..4c3bfa41df 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -500,7 +500,7 @@ void GLGSRender::end() const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); const bool allow_multidraw = supports_multidraw && !g_cfg.video.disable_FIFO_reordering; const bool single_draw = (!allow_multidraw || - rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || + rsx::method_registers.current_draw_clause.draw_command_ranges.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive); if (upload_info.index_info) @@ -522,7 +522,7 @@ void GLGSRender::end() } else { - const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size(); + const auto draw_count = rsx::method_registers.current_draw_clause.draw_command_ranges.size(); const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; uintptr_t index_ptr = index_offset; m_scratch_buffer.resize(draw_count * 16); @@ -531,9 +531,9 @@ void GLGSRender::end() const GLvoid** offsets = (const GLvoid**)(counts + draw_count); int dst_index = 0; - for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges) { - const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); + const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); counts[dst_index] = index_size; offsets[dst_index++] = (const GLvoid*)index_ptr; @@ -551,10 +551,10 @@ void GLGSRender::end() } else { - const u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + const u32 base_index = rsx::method_registers.current_draw_clause.draw_command_ranges.front().first; bool use_draw_arrays_fallback = false; - const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size(); + const auto draw_count = rsx::method_registers.current_draw_clause.draw_command_ranges.size(); const auto driver_caps = gl::get_driver_caps(); m_scratch_buffer.resize(draw_count * 24); @@ -563,10 +563,10 @@ void GLGSRender::end() const GLvoid** offsets = (const GLvoid**)(counts + draw_count); int dst_index = 0; - for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges) { const GLint first = range.first - base_index; - const GLsizei count = range.second; + const GLsizei count = range.count; firsts[dst_index] = first; counts[dst_index] = count; @@ -583,9 +583,9 @@ void GLGSRender::end() if (use_draw_arrays_fallback) { //MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more - for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges) { - glDrawArrays(draw_mode, range.first - base_index, range.second); + glDrawArrays(draw_mode, range.first - base_index, range.count); } } else if (driver_caps.vendor_AMD) diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 0346309e1f..6e6e31d77f 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -20,17 +20,17 @@ namespace namespace { // return vertex count if primitive type is not native (empty array otherwise) - std::tuple get_index_array_for_emulated_non_indexed_draw(const std::vector> &first_count_commands, rsx::primitive_type primitive_mode, gl::ring_buffer &dst) + std::tuple get_index_array_for_emulated_non_indexed_draw(const std::vector &first_count_commands, rsx::primitive_type primitive_mode, gl::ring_buffer &dst) { //This is an emulated buffer, so our indices only range from 0->original_vertex_array_length u32 vertex_count = 0; u32 element_count = 0; verify(HERE), !gl::is_primitive_native(primitive_mode); - for (const auto &pair : first_count_commands) + for (const auto &range : first_count_commands) { - element_count += (u32)get_index_count(primitive_mode, pair.second); - vertex_count += pair.second; + element_count += (u32)get_index_count(primitive_mode, range.count); + vertex_count += range.count; } auto mapping = dst.alloc_from_heap(element_count * sizeof(u16), 256); @@ -40,7 +40,7 @@ namespace return std::make_tuple(element_count, mapping.second); } - std::tuple upload_index_buffer(gsl::span raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, const std::vector>& first_count_commands, u32 initial_vertex_count) + std::tuple upload_index_buffer(gsl::span raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, const std::vector& first_count_commands, u32 initial_vertex_count) { u32 min_index, max_index, vertex_draw_count = initial_vertex_count; @@ -92,14 +92,14 @@ namespace vertex_input_state operator()(const rsx::draw_array_command& command) { const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); - const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + const u32 min_index = rsx::method_registers.current_draw_clause.min_index(); if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { u32 index_count; u32 offset_in_index_buffer; std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( - rsx::method_registers.current_draw_clause.first_count_commands, + rsx::method_registers.current_draw_clause.draw_command_ranges, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) }; @@ -129,7 +129,7 @@ namespace std::tie(min_index, max_index, index_count) = upload_index_buffer( command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, - rsx::method_registers.current_draw_clause.first_count_commands, vertex_count); + rsx::method_registers.current_draw_clause.draw_command_ranges, vertex_count); if (min_index >= max_index) { @@ -155,14 +155,15 @@ namespace vertex_input_state operator()(const rsx::draw_inlined_array& command) { - const u32 vertex_count = (u32)(command.inline_vertex_array.size() * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride; + const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size(); + const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride; if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { u32 offset_in_index_buffer; u32 index_count; std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( - { std::make_pair(0, vertex_count) }, + { { 0, 0, vertex_count } }, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) }; diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp new file mode 100644 index 0000000000..3c3e9f34b7 --- /dev/null +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -0,0 +1,690 @@ +#include "stdafx.h" + +#include "RSXFIFO.h" +#include "RSXThread.h" +#include "Capture/rsx_capture.h" + +extern rsx::frame_capture_data frame_capture; +//#pragma optimize("", off) +#define ENABLE_OPTIMIZATION_DEBUGGING 0 + +namespace rsx +{ + namespace FIFO + { + FIFO_control::FIFO_control(::rsx::thread* pctrl) + { + m_ctrl = pctrl->ctrl; + } + + bool FIFO_control::is_blocking_cmd(u32 cmd) + { + switch (cmd) + { + case NV4097_WAIT_FOR_IDLE: + case NV406E_SEMAPHORE_ACQUIRE: + case NV406E_SEMAPHORE_RELEASE: + case NV3089_IMAGE_IN: + case NV0039_BUFFER_NOTIFY: + return false; + default: + return true; + } + } + + bool FIFO_control::is_sync_cmd(u32 cmd) + { + switch (cmd) + { + case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: + case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: + case NV406E_SEMAPHORE_RELEASE: + case NV406E_SET_REFERENCE: + return true; + default: +return false; + } + } + + void FIFO_control::register_optimization_pass(optimization_pass* pass) + { + m_optimization_passes.emplace_back(pass); + } + + void FIFO_control::clear_buffer() + { + m_queue.clear(); + m_command_index = 0; + } + + void FIFO_control::read_ahead() + { + m_internal_get = m_ctrl->get; + + while (true) + { + const u32 get = m_ctrl->get; + const u32 put = m_ctrl->put; + + if (get == put) + { + break; + } + + // Validate put and get registers before reading the command + // TODO: Who should handle graphics exceptions?? + u32 cmd; + + if (u32 addr = RSXIOMem.RealAddr(get)) + { + cmd = vm::read32(addr); + } + else + { + // TODO: Optional recovery + break; + } + + if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD || + (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD || + (cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD || + (cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) + { + // Flow control, stop read ahead + m_queue.push_back({ cmd, 0, m_internal_get }); + break; + } + + if ((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD) + { + if (m_queue.back().reg) + { + // Insert one NOP only + m_queue.push_back({ cmd, 0, m_internal_get }); + } + + verify(HERE), m_ctrl->get == get; + m_ctrl->get = m_internal_get = get + 4; + continue; + } + + if (cmd & 0x3) + { + // Malformed command, optional recovery + break; + } + + u32 count = (cmd >> 18) & 0x7ff; + + //Validate the args ptr if the command attempts to read from it + auto args = vm::ptr::make(RSXIOMem.RealAddr(get + 4)); + + if (!args && count) + { + // Optional recovery + break; + } + + // Stop command execution if put will be equal to get ptr during the execution itself + if (count * 4 + 4 > put - get) + { + count = (put - get) / 4 - 1; + } + + if (count > 1) + { + // Queue packet header + m_queue.push_back({ FIFO_PACKET_BEGIN, count, m_internal_get }); + + const bool no_increment = (cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD; + u32 reg = cmd & 0xfffc; + m_internal_get += 4; // First executed command is at data[0] + + for (u32 i = 0; i < count; i++, m_internal_get += 4) + { + m_queue.push_back({ reg, args[i], m_internal_get }); + + if (!no_increment) reg += 4; + } + } + else + { + m_queue.push_back({ cmd & 0xfffc, args[0], m_internal_get }); + m_internal_get += 8; + } + + verify(HERE), m_ctrl->get == get; + m_ctrl->get = m_internal_get; + } + } + + void FIFO_control::optimize() + { + if (m_queue.empty()) + { + // Nothing to do + return; + } + + for (auto &opt : m_optimization_passes) + { + opt->optimize(m_queue, rsx::method_registers.registers.data()); + } + } + + void FIFO_control::set_put(u32 put) + { + if (m_ctrl->put == put) + { + return; + } + + m_ctrl->put = put; + } + + void FIFO_control::set_get(u32 get) + { + if (m_ctrl->get == get) + { + return; + } + + clear_buffer(); + m_ctrl->get = get; + } + + register_pair FIFO_control::read() + { + if (!m_queue.empty() && m_internal_get != m_ctrl->get) + { + // Control register changed + clear_buffer(); + } + + if (m_command_index && m_command_index >= m_queue.size()) + { + // Whole queue consumed + verify(HERE), !m_queue.empty(); + clear_buffer(); + } + + if (m_queue.empty()) + { + // Empty queue, read ahead + read_ahead(); + optimize(); + } + + if (!m_queue.empty()) + { + verify(HERE), m_command_index < m_queue.size(); + return m_queue[m_command_index++]; + } + + return { FIFO_EMPTY, 0 }; + } + + // Optimization passes + void flattening_pass::optimize(std::vector& commands, const u32* registers) const + { +#if (ENABLE_OPTIMIZATION_DEBUGGING) + auto copy = commands; +#endif + // Removes commands that have no effect on the pipeline + + register_pair* last_begin = nullptr; + register_pair* last_end = nullptr; + + u32 deferred_primitive_type = UINT32_MAX; + bool has_deferred_call = false; + + + + std::unordered_map register_tracker; // Tracks future register writes + auto test_register = [&](u32 reg, u32 value) + { + u32 test; + auto found = register_tracker.find(reg); + if (found == register_tracker.end()) + { + test = registers[reg]; + } + else + { + test = found->second; + } + + return (value == test); + }; + + auto set_register = [&](u32 reg, u32 value) + { + register_tracker[reg] = value; + }; + + auto patch_draw_calls = [&]() + { + if (last_end) + { + // Restore scope end + last_end->reg = (NV4097_SET_BEGIN_END << 2); + } + + if (last_begin > last_end) + { + // Dangling clause, restore scope open + last_begin->reg = (NV4097_SET_BEGIN_END << 2); + } + }; + + for (auto &command : commands) + { + //LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(command.reg), command.value); + + bool flush_commands_flag = has_deferred_call; + bool execute_method_flag = true; + + const auto reg = command.reg >> 2; + const auto value = command.value; + switch (reg) + { + case NV4097_SET_BEGIN_END: + { + if (value && value != deferred_primitive_type) + { + // Begin call with different primitive type + deferred_primitive_type = value; + } + else + { + // This is either an End call or another Begin with the same primitive type + has_deferred_call = true; + flush_commands_flag = false; + execute_method_flag = false; + } + + break; + } + case NV4097_DRAW_ARRAYS: + { + const auto cmd = method_registers.current_draw_clause.command; + if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) + break; + + flush_commands_flag = false; + break; + } + case NV4097_DRAW_INDEX_ARRAY: + { + const auto cmd = method_registers.current_draw_clause.command; + if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) + break; + + flush_commands_flag = false; + break; + } + default: + { + // TODO: Reorder draw commands between synchronization events to maximize batched sizes + static const std::pair skippable_ranges[] = + { + // Texture configuration + { NV4097_SET_TEXTURE_OFFSET, 8 * 16 }, + { NV4097_SET_TEXTURE_CONTROL2, 16 }, + { NV4097_SET_TEXTURE_CONTROL3, 16 }, + { NV4097_SET_VERTEX_TEXTURE_OFFSET, 8 * 4 }, + // Surface configuration + { NV4097_SET_SURFACE_CLIP_HORIZONTAL, 1 }, + { NV4097_SET_SURFACE_CLIP_VERTICAL, 1 }, + { NV4097_SET_SURFACE_COLOR_AOFFSET, 1 }, + { NV4097_SET_SURFACE_COLOR_BOFFSET, 1 }, + { NV4097_SET_SURFACE_COLOR_COFFSET, 1 }, + { NV4097_SET_SURFACE_COLOR_DOFFSET, 1 }, + { NV4097_SET_SURFACE_ZETA_OFFSET, 1 }, + { NV4097_SET_CONTEXT_DMA_COLOR_A, 1 }, + { NV4097_SET_CONTEXT_DMA_COLOR_B, 1 }, + { NV4097_SET_CONTEXT_DMA_COLOR_C, 1 }, + { NV4097_SET_CONTEXT_DMA_COLOR_D, 1 }, + { NV4097_SET_CONTEXT_DMA_ZETA, 1 }, + { NV4097_SET_SURFACE_FORMAT, 1 }, + { NV4097_SET_SURFACE_PITCH_A, 1 }, + { NV4097_SET_SURFACE_PITCH_B, 1 }, + { NV4097_SET_SURFACE_PITCH_C, 1 }, + { NV4097_SET_SURFACE_PITCH_D, 1 }, + { NV4097_SET_SURFACE_PITCH_Z, 1 }, + // Program configuration + { NV4097_SET_TRANSFORM_PROGRAM_START, 1 }, + { NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, 1 }, + { NV4097_SET_TRANSFORM_PROGRAM, 512 } + }; + + if (has_deferred_call) + { + // Hopefully this is skippable so the batch can keep growing + for (const auto &method : skippable_ranges) + { + if (reg < method.first) + continue; + + if (reg - method.first < method.second) + { + // Safe to ignore if value has not changed + if (test_register(reg, value)) + { + execute_method_flag = false; + flush_commands_flag = false; + } + else + { + set_register(reg, value); + } + + break; + } + } + } + break; + } + } + + if (!execute_method_flag) + { + command.reg = FIFO_DISABLED_COMMAND; + + if (reg == NV4097_SET_BEGIN_END) + { + if (command.value) + { + last_begin = &command; + } + else + { + last_end = &command; + } + } + } + + if (flush_commands_flag) + { + has_deferred_call = false; + deferred_primitive_type = UINT32_MAX; + + patch_draw_calls(); + } + } + + if (has_deferred_call) + { + verify(HERE), deferred_primitive_type != UINT32_MAX; + patch_draw_calls(); + } + +#if (ENABLE_OPTIMIZATION_DEBUGGING) + + bool mismatch = false; + for (int n = 0; n < commands.size(); ++n) + { + auto command = commands[n]; + auto old = copy[n]; + + if (command.reg != old.reg) + { + if (old.reg == (NV4097_SET_BEGIN_END << 2) && old.value) + { + mismatch = true; + break; + } + } + } + + if (!mismatch) + { + return; + } + + auto _get_method_name = [&](u32 reg) -> std::string + { + if (reg == FIFO_DISABLED_COMMAND) + { + return "COMMAND DISABLED"; + } + + if (reg == FIFO_PACKET_BEGIN) + { + return "PACKET BEGIN"; + } + + return rsx::get_method_name(reg >> 2); + }; + + LOG_ERROR(RSX, "------------------- DUMP BEGINS--------------------"); + for (int n = 0; n < commands.size(); ++n) + { + auto command = commands[n]; + auto old = copy[n]; + + if (old.reg != command.reg || command.value != command.value) + { + LOG_ERROR(RSX, "[0x%x] %s(0x%x) -> %s(0x%x)", command.loc, _get_method_name(old.reg), old.value, _get_method_name(command.reg), command.value); + } + else + { + LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(old.reg), old.value); + } + } + LOG_ERROR(RSX, "------------------- DUMP ENDS--------------------"); +#endif + } + + void reordering_pass::optimize(std::vector& commands, const u32* registers) const + { +#if 0 + // Define a draw call + struct texture_entry + { + u32 index = -1u; + u32 address = 0; + u32 filter = 0; + u32 control0 = 0; + u32 control1 = 0; + u32 control2 = 0; + u32 control3 = 0; + }; + + struct draw_call + { + std::vector instructions; + std::array fragment_texture_state{}; + std::array vertex_texture_state{}; + }; + + std::vector draw_calls; +#endif + } + } + + void thread::run_FIFO() + { + auto command = fifo_ctrl->read(); + const auto cmd = command.reg; + + if (cmd == FIFO::FIFO_EMPTY || !Emu.IsRunning()) + { + if (performance_counters.state == FIFO_state::running) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + performance_counters.state = FIFO_state::empty; + } + + return; + } + + // Validate put and get registers before reading the command + // TODO: Who should handle graphics exceptions?? + if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) + { + u32 offs = cmd & 0x1ffffffc; + if (offs == command.loc) + { + //Jump to self. Often preceded by NOP + if (performance_counters.state == FIFO_state::running) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + } + + performance_counters.state = FIFO_state::spinning; + } + + //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); + fifo_ctrl->set_get(offs); + return; + } + if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) + { + u32 offs = cmd & 0xfffffffc; + if (offs == command.loc) + { + //Jump to self. Often preceded by NOP + if (performance_counters.state == FIFO_state::running) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + } + + performance_counters.state = FIFO_state::spinning; + } + + //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); + fifo_ctrl->set_get(offs); + return; + } + if ((cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD) + { + if (m_return_addr != -1) + { + // Only one layer is allowed in the call stack. + LOG_ERROR(RSX, "FIFO: CALL found inside a subroutine. Discarding subroutine"); + fifo_ctrl->set_get(std::exchange(m_return_addr, -1)); + return; + } + + u32 offs = cmd & 0xfffffffc; + //LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get); + m_return_addr = command.loc + 4; + fifo_ctrl->set_get(offs); + return; + } + if ((cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) + { + if (m_return_addr == -1) + { + LOG_ERROR(RSX, "FIFO: RET found without corresponding CALL. Discarding queue"); + fifo_ctrl->set_get(ctrl->put); + return; + } + + //LOG_WARNING(RSX, "rsx return(0x%x)", get); + fifo_ctrl->set_get(m_return_addr); + m_return_addr = -1; + return; + } + if ((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD) + { + if (performance_counters.state == FIFO_state::running) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + performance_counters.state = FIFO_state::nop; + } + + return; + } + if (cmd & 0x3) + { + // TODO: Check for more invalid bits combinations + LOG_ERROR(RSX, "FIFO: Illegal command(0x%x) was executed. Resetting...", cmd); + fifo_ctrl->set_get(restore_point.load()); + m_return_addr = restore_ret_addr; + return; + } + + if (performance_counters.state != FIFO_state::running) + { + //Update performance counters with time spent in idle mode + performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp); + + if (performance_counters.state == FIFO_state::spinning) + { + //TODO: Properly simulate FIFO wake delay. + //NOTE: The typical spin setup is a NOP followed by a jump-to-self + //NOTE: There is a small delay when the jump address is dynamically edited by cell + busy_wait(3000); + } + + performance_counters.state = FIFO_state::running; + } + + u32 count = 1; + if (cmd == FIFO::FIFO_PACKET_BEGIN) + { + count = command.value; + command = fifo_ctrl->read(); + } + + for (u32 i = 0; i < count; ++i) + { + if (i) command = fifo_ctrl->read(); + + const u32 reg = command.reg >> 2; + const u32 value = command.value; + + if (capture_current_frame) + { + frame_debug.command_queue.push_back(std::make_pair(reg, value)); + + if (!(reg == NV406E_SET_REFERENCE || reg == NV406E_SEMAPHORE_RELEASE || reg == NV406E_SEMAPHORE_ACQUIRE)) + { + // todo: handle nv406e methods better?, do we care about call/jumps? + rsx::frame_capture_data::replay_command replay_cmd; + replay_cmd.rsx_command = std::make_pair(i == 0 ? cmd : 0, value); + + frame_capture.replay_commands.push_back(replay_cmd); + + // to make this easier, use the replay command 'i' positions back + auto it = std::prev(frame_capture.replay_commands.end(), i + 1); + + switch (reg) + { + case NV4097_GET_REPORT: + capture::capture_get_report(this, *it, value); + break; + case NV3089_IMAGE_IN: + capture::capture_image_in(this, *it); + break; + case NV0039_BUFFER_NOTIFY: + capture::capture_buffer_notify(this, *it); + break; + case NV4097_CLEAR_SURFACE: + capture::capture_surface_state(this, *it); + break; + default: + if (reg >= NV308A_COLOR && reg < NV3089_SET_OBJECT) + capture::capture_inline_transfer(this, *it, reg - NV308A_COLOR, value); + break; + } + } + } + + if (command.reg == FIFO::FIFO_DISABLED_COMMAND) + { + // Placeholder for dropped commands + continue; + } + + method_registers.decode(reg, value); + + if (auto method = methods[reg]) + { + method(this, reg, value); + } + } + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h new file mode 100644 index 0000000000..1cd06b9e6f --- /dev/null +++ b/rpcs3/Emu/RSX/RSXFIFO.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +#include + +struct RsxDmaControl; + +namespace rsx +{ + class thread; + + namespace FIFO + { + enum internal_commands : u32 + { + NOP = 0, + FIFO_EMPTY = 0xDEADF1F0, + FIFO_PACKET_BEGIN = 0xF1F0, + FIFO_DISABLED_COMMAND = 0xF1F4, + }; + + struct register_pair + { + u32 reg; + u32 value; + u32 loc; + }; + + struct optimization_pass + { + virtual void optimize(std::vector& commands, const u32* registers) const = 0; + }; + + struct flattening_pass : public optimization_pass + { + void optimize(std::vector& commands, const u32* registers) const override; + }; + + struct reordering_pass : public optimization_pass + { + void optimize(std::vector& commands, const u32* registers) const override; + }; + + class FIFO_control + { + RsxDmaControl* m_ctrl = nullptr; + u32 m_internal_get = 0; + + std::vector> m_optimization_passes; + + std::vector m_queue; + atomic_t m_command_index{ 0 }; + + bool is_blocking_cmd(u32 cmd); + bool is_sync_cmd(u32 cmd); + + void read_ahead(); + void optimize(); + void clear_buffer(); + + public: + FIFO_control(rsx::thread* pctrl); + ~FIFO_control() {} + + void set_get(u32 get); + void set_put(u32 put); + + register_pair read(); + + void register_optimization_pass(optimization_pass* pass); + }; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 4659f86252..8ee272ab28 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -428,6 +428,12 @@ namespace rsx zcull_ctrl = std::make_unique<::rsx::reports::ZCULL_control>(); } + fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this); + + fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); + //fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); + //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); + last_flip_time = get_system_time() - 1000000; named_thread vblank_thread("VBlank Thread", [this]() @@ -509,515 +515,24 @@ namespace rsx // Round to nearest to deal with forward/reverse scaling fesetround(FE_TONEAREST); - // Deferred calls are used to batch draws together - u32 deferred_primitive_type = 0; - u32 deferred_call_size = 0; - s32 deferred_begin_end = 0; - std::vector deferred_stack; - bool has_deferred_call = false; - - auto flush_command_queue = [&]() - { - const auto num_draws = (u32)method_registers.current_draw_clause.first_count_commands.size(); - bool emit_begin = false; - bool emit_end = true; - - if (num_draws > 1) - { - auto& first_counts = method_registers.current_draw_clause.first_count_commands; - deferred_stack.resize(0); - - u32 last = first_counts.front().first; - u32 last_index = 0; - - for (u32 draw = 0; draw < num_draws; draw++) - { - if (first_counts[draw].first != last) - { - //Disjoint - deferred_stack.push_back(draw); - } - - last = first_counts[draw].first + first_counts[draw].second; - } - - if (deferred_stack.size() > 0) - { - LOG_TRACE(RSX, "Disjoint draw range detected"); - - deferred_stack.push_back(num_draws); //Append last pair - std::vector> temp_range = first_counts; - auto current_command = rsx::method_registers.current_draw_clause.command; - - u32 last_index = 0; - - for (const u32 draw : deferred_stack) - { - if (emit_begin) - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type); - else - emit_begin = true; - - //NOTE: These values are reset if begin command is emitted - first_counts.resize(draw - last_index); - std::copy(temp_range.begin() + last_index, temp_range.begin() + draw, first_counts.begin()); - rsx::method_registers.current_draw_clause.command = current_command; - - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); - last_index = draw; - } - - emit_end = false; - } - } - - if (emit_end) - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); - - if (deferred_begin_end > 0) //Hanging draw call (useful for immediate rendering where the begin call needs to be noted) - methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type); - - deferred_begin_end = 0; - deferred_primitive_type = 0; - deferred_call_size = 0; - has_deferred_call = false; - }; - // TODO: exit condition while (!Emu.IsStopped()) { - //Wait for external pause events + // Wait for external pause events if (external_interrupt_lock.load()) { external_interrupt_ack.store(true); while (external_interrupt_lock.load()) _mm_pause(); } - //Execute backend-local tasks first + // Execute backend-local tasks first do_local_task(performance_counters.state); - //Update sub-units + // Update sub-units zcull_ctrl->update(this); - //Set up restore state if needed - if (sync_point_request) - { - if (RSXIOMem.RealAddr(internal_get)) - { - //New internal get is valid, use it - restore_point = internal_get.load(); - restore_ret_addr = m_return_addr; - } - else - { - LOG_ERROR(RSX, "Could not update FIFO restore point"); - } - - sync_point_request = false; - } - else if (performance_counters.state != FIFO_state::running) - { - if (performance_counters.state != FIFO_state::nop) - { - if (has_deferred_call) - { - //Flush if spinning or queue is empty - flush_command_queue(); - } - else if (zcull_ctrl->has_pending()) - { - //zcull_ctrl->sync(this); - } - else - { - //do_internal_task(); - } - } - } - - //Now load the FIFO ctrl registers - ctrl->get.store(internal_get.load()); - const u32 put = ctrl->put; - - if (put == internal_get || !Emu.IsRunning()) - { - if (performance_counters.state == FIFO_state::running) - { - performance_counters.FIFO_idle_timestamp = get_system_time(); - performance_counters.state = FIFO_state::empty; - } - - continue; - } - - // Validate put and get registers before reading the command - // TODO: Who should handle graphics exceptions?? - u32 cmd; - - if (u32 addr = RSXIOMem.RealAddr(internal_get)) - { - cmd = vm::read32(addr); - } - else - { - std::this_thread::sleep_for(33ms); - - if (!RSXIOMem.RealAddr(internal_get)) - { - LOG_ERROR(RSX, "Invalid FIFO queue get/put registers found: get=0x%X, put=0x%X; Resetting...", +internal_get, put); - internal_get = restore_point.load(); - m_return_addr = restore_ret_addr; - } - - continue; - } - - if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) - { - u32 offs = cmd & 0x1ffffffc; - if (offs == internal_get.load()) - { - //Jump to self. Often preceded by NOP - if (performance_counters.state == FIFO_state::running) - { - performance_counters.FIFO_idle_timestamp = get_system_time(); - } - - performance_counters.state = FIFO_state::spinning; - } - - //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); - internal_get = offs; - continue; - } - if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) - { - u32 offs = cmd & 0xfffffffc; - if (offs == internal_get.load()) - { - //Jump to self. Often preceded by NOP - if (performance_counters.state == FIFO_state::running) - { - performance_counters.FIFO_idle_timestamp = get_system_time(); - } - - performance_counters.state = FIFO_state::spinning; - } - - //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); - internal_get = offs; - continue; - } - if ((cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD) - { - if (m_return_addr != -1) - { - // Only one layer is allowed in the call stack. - LOG_ERROR(RSX, "FIFO: CALL found inside a subroutine. Discarding subroutine"); - internal_get = std::exchange(m_return_addr, -1); - continue; - } - u32 offs = cmd & ~3; - //LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get); - m_return_addr = std::exchange(internal_get.raw(), offs) + 4; - continue; - } - - if (cmd & 0x3) - { - // TODO: Check for more invalid bits combinations - LOG_ERROR(RSX, "FIFO: Illegal command(0x%x) was executed. Resetting...", cmd); - internal_get = restore_point.load(); - m_return_addr = restore_ret_addr; - continue; - } - - if ((cmd & ~0xfffc) == RSX_METHOD_RETURN_CMD) - { - if (m_return_addr == -1) - { - LOG_ERROR(RSX, "FIFO: RET found without corresponding CALL. Discarding queue"); - internal_get = put; - continue; - } - - u32 get = std::exchange(m_return_addr, -1); - //LOG_WARNING(RSX, "rsx return(0x%x)", get); - internal_get = get; - continue; - } - - u32 count = (cmd >> 18) & 0x7ff; - - if (count == 0) //nop - { - if (performance_counters.state == FIFO_state::running) - { - performance_counters.FIFO_idle_timestamp = get_system_time(); - performance_counters.state = FIFO_state::nop; - } - - internal_get += 4; - continue; - } - - //Validate the args ptr if the command attempts to read from it - auto args = vm::ptr::make(RSXIOMem.RealAddr(internal_get + 4)); - - if (!args) - { - std::this_thread::sleep_for(33ms); - - if (!RSXIOMem.RealAddr(internal_get + 4)) - { - LOG_ERROR(RSX, "Invalid FIFO queue args ptr found: get=0x%X, put=0x%X, count=%d; Resetting...", +internal_get, put, count); - internal_get = restore_point.load(); - m_return_addr = restore_ret_addr; - } - - continue; - } - - u32 first_cmd = (cmd & 0xfffc) >> 2; - - // Stop command execution if put will be equal to get ptr during the execution itself - if (count * 4 + 4 > put - internal_get) - { - count = (put - internal_get) / 4 - 1; - } - - if (performance_counters.state != FIFO_state::running) - { - //Update performance counters with time spent in idle mode - performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp); - - if (performance_counters.state == FIFO_state::spinning) - { - //TODO: Properly simulate FIFO wake delay. - //NOTE: The typical spin setup is a NOP followed by a jump-to-self - //NOTE: There is a small delay when the jump address is dynamically edited by cell - busy_wait(3000); - } - - performance_counters.state = FIFO_state::running; - } - - for (u32 i = 0; i < count; i++) - { - u32 reg = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? first_cmd : first_cmd + i; - u32 value = args[i]; - - bool execute_method_call = true; - - //TODO: Flatten draw calls when multidraw is not supported to simplify checking in the end() methods - if (supports_multidraw && !g_cfg.video.disable_FIFO_reordering && !capture_current_frame) - { - //TODO: Make this cleaner - bool flush_commands_flag = has_deferred_call; - - switch (reg) - { - case NV4097_SET_BEGIN_END: - { - // Hook; Allows begin to go through, but ignores end - if (value) - deferred_begin_end++; - else - deferred_begin_end--; - - if (value && value != deferred_primitive_type) - deferred_primitive_type = value; - else - { - has_deferred_call = true; - flush_commands_flag = false; - execute_method_call = false; - - deferred_call_size++; - - if (!method_registers.current_draw_clause.is_disjoint_primitive) - { - // Combine all calls since the last one - auto &first_count = method_registers.current_draw_clause.first_count_commands; - if (first_count.size() > deferred_call_size) - { - const auto &batch_first_count = first_count[deferred_call_size - 1]; - u32 count = batch_first_count.second; - u32 next = batch_first_count.first + count; - - for (int n = deferred_call_size; n < first_count.size(); n++) - { - if (first_count[n].first != next) - { - LOG_ERROR(RSX, "Non-continuous first-count range passed as one draw; will be split."); - - first_count[deferred_call_size - 1].second = count; - deferred_call_size++; - - count = first_count[deferred_call_size - 1].second; - next = first_count[deferred_call_size - 1].first + count; - continue; - } - - count += first_count[n].second; - next += first_count[n].second; - } - - first_count[deferred_call_size - 1].second = count; - first_count.resize(deferred_call_size); - } - } - } - - break; - } - // These commands do not alter the pipeline state and deferred calls can still be active - // TODO: Add more commands here - case NV4097_INVALIDATE_VERTEX_FILE: - flush_commands_flag = false; - break; - case NV4097_DRAW_ARRAYS: - { - const auto cmd = method_registers.current_draw_clause.command; - if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) - break; - - flush_commands_flag = false; - break; - } - case NV4097_DRAW_INDEX_ARRAY: - { - const auto cmd = method_registers.current_draw_clause.command; - if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) - break; - - flush_commands_flag = false; - break; - } - default: - { - // TODO: Reorder draw commands between synchronization events to maximize batched sizes - static const std::pair skippable_ranges[] = - { - // Texture configuration - { NV4097_SET_TEXTURE_OFFSET, 8 * 16 }, - { NV4097_SET_TEXTURE_CONTROL2, 16 }, - { NV4097_SET_TEXTURE_CONTROL3, 16 }, - { NV4097_SET_VERTEX_TEXTURE_OFFSET, 8 * 4 }, - // Surface configuration - { NV4097_SET_SURFACE_CLIP_HORIZONTAL, 1 }, - { NV4097_SET_SURFACE_CLIP_VERTICAL, 1 }, - { NV4097_SET_SURFACE_COLOR_AOFFSET, 1 }, - { NV4097_SET_SURFACE_COLOR_BOFFSET, 1 }, - { NV4097_SET_SURFACE_COLOR_COFFSET, 1 }, - { NV4097_SET_SURFACE_COLOR_DOFFSET, 1 }, - { NV4097_SET_SURFACE_ZETA_OFFSET, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_A, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_B, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_C, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_D, 1 }, - { NV4097_SET_CONTEXT_DMA_ZETA, 1 }, - { NV4097_SET_SURFACE_FORMAT, 1 }, - { NV4097_SET_SURFACE_PITCH_A, 1 }, - { NV4097_SET_SURFACE_PITCH_B, 1 }, - { NV4097_SET_SURFACE_PITCH_C, 1 }, - { NV4097_SET_SURFACE_PITCH_D, 1 }, - { NV4097_SET_SURFACE_PITCH_Z, 1 }, - // Program configuration - { NV4097_SET_TRANSFORM_PROGRAM_START, 1 }, - { NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, 1 }, - { NV4097_SET_TRANSFORM_PROGRAM, 512 } - }; - - if (has_deferred_call) - { - //Hopefully this is skippable so the batch can keep growing - for (const auto &method : skippable_ranges) - { - if (reg < method.first) - continue; - - if (reg - method.first < method.second) - { - //Safe to ignore if value has not changed - if (method_registers.test(reg, value)) - { - execute_method_call = false; - flush_commands_flag = false; - } - - break; - } - } - } - - break; - } - } - - if (flush_commands_flag) - { - flush_command_queue(); - } - } - - if (capture_current_frame) - { - frame_debug.command_queue.push_back(std::make_pair(reg, value)); - - if (!(reg == NV406E_SET_REFERENCE || reg == NV406E_SEMAPHORE_RELEASE || reg == NV406E_SEMAPHORE_ACQUIRE)) - { - // todo: handle nv406e methods better?, do we care about call/jumps? - rsx::frame_capture_data::replay_command replay_cmd; - replay_cmd.rsx_command = std::make_pair(i == 0 ? cmd : 0, value); - - frame_capture.replay_commands.push_back(replay_cmd); - - // to make this easier, use the replay command 'i' positions back - auto it = std::prev(frame_capture.replay_commands.end(), i + 1); - - switch (reg) - { - case NV4097_GET_REPORT: - capture::capture_get_report(this, *it, value); - break; - case NV3089_IMAGE_IN: - capture::capture_image_in(this, *it); - break; - case NV0039_BUFFER_NOTIFY: - capture::capture_buffer_notify(this, *it); - break; - case NV4097_CLEAR_SURFACE: - capture::capture_surface_state(this, *it); - break; - default: - if (reg >= NV308A_COLOR && reg < NV3089_SET_OBJECT) - capture::capture_inline_transfer(this, *it, reg - NV308A_COLOR, value); - break; - } - } - } - - method_registers.decode(reg, value); - - if (execute_method_call) - { - if (auto method = methods[reg]) - { - method(this, reg, value); - } - } - - if (invalid_command_interrupt_raised) - { - invalid_command_interrupt_raised = false; - - //Skip the rest of this command - break; - } - } - - internal_get += (count + 1) * 4; + // Execite FIFO queue + run_FIFO(); } } @@ -1225,7 +740,7 @@ namespace rsx return t + timestamp_subvalue; } - gsl::span thread::get_raw_index_array(const std::vector >& draw_indexed_clause) const + gsl::span thread::get_raw_index_array(const std::vector& draw_indexed_clause) const { if (element_push_buffer.size()) { @@ -1240,43 +755,49 @@ namespace rsx bool is_primitive_restart_enabled = rsx::method_registers.restart_index_enabled(); u32 primitive_restart_index = rsx::method_registers.restart_index(); - // Disjoint first_counts ranges not supported atm - for (int i = 0; i < draw_indexed_clause.size() - 1; i++) + u32 min_index = UINT32_MAX; + u32 max_index = 0; + + for (const auto &range : draw_indexed_clause) { - const std::tuple &range = draw_indexed_clause[i]; - const std::tuple &next_range = draw_indexed_clause[i + 1]; - verify(HERE), (std::get<0>(range) + std::get<1>(range) == std::get<0>(next_range)); + const u32 root_index = (range.command_data_offset / type_size) + range.first; + min_index = std::min(root_index, min_index); + max_index = std::max(root_index + range.count, max_index); } - u32 first = std::get<0>(draw_indexed_clause.front()); - u32 count = std::get<0>(draw_indexed_clause.back()) + std::get<1>(draw_indexed_clause.back()) - first; + + const u32 first = min_index; + const u32 count = max_index - min_index; const gsl::byte* ptr = static_cast(vm::base(address)); return{ ptr + first * type_size, count * type_size }; } - gsl::span thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const std::vector>& vertex_ranges) const + gsl::span thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const std::vector& vertex_ranges) const { u32 offset = vertex_array_info.offset(); u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31); u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size()); - // Disjoint first_counts ranges not supported atm - for (int i = 0; i < vertex_ranges.size() - 1; i++) + u32 min_index = UINT32_MAX; + u32 max_index = 0; + + for (const auto &range : vertex_ranges) { - const std::tuple& range = vertex_ranges[i]; - const std::tuple& next_range = vertex_ranges[i + 1]; - verify(HERE), (std::get<0>(range) + std::get<1>(range) == std::get<0>(next_range)); + const auto root_index = (range.command_data_offset / vertex_array_info.stride()) + range.first; + min_index = std::min(root_index, min_index); + max_index = std::max(root_index + range.count, max_index); } - u32 first = std::get<0>(vertex_ranges.front()); - u32 count = std::get<0>(vertex_ranges.back()) + std::get<1>(vertex_ranges.back()) - first; + + const u32 first = min_index; + const u32 count = max_index - min_index; const gsl::byte* ptr = gsl::narrow_cast(vm::base(address)); return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size}; } std::vector> - thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges, const u64 consumed_attrib_mask) const + thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector& vertex_ranges, const u64 consumed_attrib_mask) const { std::vector> result; result.reserve(rsx::limits::vertex_count); @@ -1324,21 +845,22 @@ namespace rsx std::variant thread::get_draw_command(const rsx::rsx_state& state) const { - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) { - return draw_array_command{ - rsx::method_registers.current_draw_clause.first_count_commands}; + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) + { + return draw_array_command{}; } - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) { - return draw_indexed_array_command{ - rsx::method_registers.current_draw_clause.first_count_commands, - get_raw_index_array( - rsx::method_registers.current_draw_clause.first_count_commands)}; + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) + { + return draw_indexed_array_command + { + get_raw_index_array( rsx::method_registers.current_draw_clause.draw_command_ranges) + }; } - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) { - return draw_inlined_array{ - rsx::method_registers.current_draw_clause.inline_vertex_array}; + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) + { + return draw_inlined_array{}; } fmt::throw_exception("ill-formed draw command" HERE); @@ -2799,6 +2321,7 @@ namespace rsx void thread::unpause() { + // TODO: Clean this shit up external_interrupt_lock.store(false); } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index af6753cceb..79b0b0b20b 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -8,6 +8,7 @@ #include #include "GCM.h" #include "rsx_cache.h" +#include "RSXFIFO.h" #include "RSXTexture.h" #include "RSXVertexProgram.h" #include "RSXFragmentProgram.h" @@ -155,25 +156,18 @@ namespace rsx struct draw_array_command { - /** - * First and count of index subranges. - */ - std::vector> indexes_range; + u32 __dummy; }; struct draw_indexed_array_command { - /** - * First and count of subranges to fetch in index buffer. - */ - std::vector> ranges_to_fetch_in_index_buffer; - gsl::span raw_index_buffer; }; struct draw_inlined_array { - std::vector inline_vertex_array; + u32 __dummy; + u32 __dummy2; }; struct interleaved_range_info @@ -379,6 +373,10 @@ namespace rsx bool supports_multidraw = false; bool supports_native_ui = false; + // FIFO + friend class FIFO::FIFO_control; + std::unique_ptr fifo_ctrl; + // Occlusion query bool zcull_surface_active = false; std::unique_ptr zcull_ctrl; @@ -396,7 +394,6 @@ namespace rsx public: RsxDmaControl* ctrl = nullptr; - atomic_t internal_get{ 0 }; atomic_t restore_point{ 0 }; atomic_t external_interrupt_lock{ false }; atomic_t external_interrupt_ack{ false }; @@ -531,6 +528,8 @@ namespace rsx virtual void on_decompiler_exit() {} virtual bool on_decompiler_task() { return false; } + void run_FIFO(); + public: virtual void begin(); virtual void end(); @@ -555,11 +554,11 @@ namespace rsx void read_barrier(u32 memory_address, u32 memory_range); virtual void sync_hint(FIFO_hint hint) {} - gsl::span get_raw_index_array(const std::vector >& draw_indexed_clause) const; - gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; + gsl::span get_raw_index_array(const std::vector& draw_indexed_clause) const; + gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector& vertex_ranges) const; std::vector> - get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges, const u64 consumed_attrib_mask) const; + get_vertex_buffers(const rsx::rsx_state& state, const std::vector& vertex_ranges, const u64 consumed_attrib_mask) const; std::variant get_draw_command(const rsx::rsx_state& state) const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8757e2437a..d8c5b8ed01 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1487,7 +1487,7 @@ void VKGSRender::end() const bool allow_multidraw = supports_multidraw && !g_cfg.video.disable_FIFO_reordering; const bool single_draw = (!allow_multidraw || - rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || + rsx::method_registers.current_draw_clause.draw_command_ranges.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive); if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) @@ -1508,10 +1508,10 @@ void VKGSRender::end() } else { - const auto base_vertex = rsx::method_registers.current_draw_clause.first_count_commands.front().first; - for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + const auto base_vertex = rsx::method_registers.current_draw_clause.draw_command_ranges.front().first; + for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges) { - vkCmdDraw(*m_current_command_buffer, range.second, 1, range.first - base_vertex, 0); + vkCmdDraw(*m_current_command_buffer, range.count, 1, range.first - base_vertex, 0); } } } @@ -1531,9 +1531,9 @@ void VKGSRender::end() else { u32 first_vertex = 0; - for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges) { - const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); + const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0); first_vertex += verts; } diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 5a7da386ff..365894ca76 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -104,7 +104,7 @@ namespace rsx::method_registers.current_draw_clause.primitive, primitives_emulated); const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); - const u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + const u32 min_index = rsx::method_registers.current_draw_clause.min_index(); if (primitives_emulated) { @@ -165,7 +165,7 @@ namespace command.raw_index_buffer, index_type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.restart_index_enabled(), - rsx::method_registers.restart_index(), command.ranges_to_fetch_in_index_buffer, + rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.draw_command_ranges, rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); }); if (min_index >= max_index) @@ -204,8 +204,9 @@ namespace bool primitives_emulated = false; auto &draw_clause = rsx::method_registers.current_draw_clause; VkPrimitiveTopology prims = vk::get_appropriate_topology(draw_clause.primitive, primitives_emulated); - - const u32 vertex_count = ((u32)command.inline_vertex_array.size() * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride; + + const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size(); + const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride; if (!primitives_emulated) { diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index d948d72628..1b545559d1 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -1057,6 +1057,10 @@ enum Method RSX_METHOD_NON_METHOD_CMD_MASK = 0xa0030003, RSX_METHOD_RETURN_CMD = 0x00020000, + RSX_METHOD_RETURN_MASK = 0xffff0003, + + RSX_METHOD_NOP_CMD = 0x00000000, + RSX_METHOD_NOP_MASK = 0x1ffc0000, }; //Fog diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index db9fa82280..f2c9d757e7 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -340,8 +340,7 @@ namespace rsx rsx::method_registers.current_draw_clause.command = rsx::draw_command::array; rsx::registers_decoder::decoded_type v(arg); - rsx::method_registers.current_draw_clause.first_count_commands.emplace_back( - std::make_pair(v.start(), v.count())); + rsx::method_registers.current_draw_clause.append(v.start(), v.count()); } void draw_index_array(thread* rsx, u32 _reg, u32 arg) @@ -349,8 +348,7 @@ namespace rsx rsx::method_registers.current_draw_clause.command = rsx::draw_command::indexed; rsx::registers_decoder::decoded_type v(arg); - rsx::method_registers.current_draw_clause.first_count_commands.emplace_back( - std::make_pair(v.start(), v.count())); + rsx::method_registers.current_draw_clause.append(v.start(), v.count()); } void draw_inline_array(thread* rsx, u32 _reg, u32 arg) @@ -425,7 +423,7 @@ namespace rsx { if (arg) { - rsx::method_registers.current_draw_clause.first_count_commands.resize(0); + rsx::method_registers.current_draw_clause.draw_command_ranges.clear(); rsx::method_registers.current_draw_clause.command = draw_command::none; rsx::method_registers.current_draw_clause.primitive = to_primitive_type(arg); rsxthr->begin(); @@ -444,19 +442,19 @@ namespace rsx if (push_buffer_index_count) { rsx::method_registers.current_draw_clause.command = rsx::draw_command::indexed; - rsx::method_registers.current_draw_clause.first_count_commands.push_back(std::make_pair(0, push_buffer_index_count)); + rsx::method_registers.current_draw_clause.append(0, push_buffer_index_count); } else if (push_buffer_vertices_count) { rsx::method_registers.current_draw_clause.command = rsx::draw_command::array; - rsx::method_registers.current_draw_clause.first_count_commands.push_back(std::make_pair(0, push_buffer_vertices_count)); + rsx::method_registers.current_draw_clause.append(0, push_buffer_vertices_count); } } else rsx::method_registers.current_draw_clause.is_immediate_draw = false; - if (!(rsx::method_registers.current_draw_clause.first_count_commands.empty() && - rsx::method_registers.current_draw_clause.inline_vertex_array.empty())) + if (!(rsx::method_registers.current_draw_clause.draw_command_ranges.empty() && + rsx::method_registers.current_draw_clause.inline_vertex_array.empty())) { rsxthr->end(); } diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 3af9a79100..19f6c473f4 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include @@ -23,6 +23,13 @@ namespace rsx indexed, }; + struct draw_range_t + { + u32 command_data_offset = 0; + u32 first = 0; + u32 count = 0; + }; + struct draw_clause { primitive_type primitive; @@ -36,20 +43,42 @@ namespace rsx /** * Stores the first and count argument from draw/draw indexed parameters between begin/end clauses. */ - std::vector > first_count_commands; - - /** - * Optionally split first-count pairs for disjoint range rendering. Valid when emulating primitive restart - */ - std::vector > alternate_first_count_commands; + std::vector draw_command_ranges; /** * Returns how many vertex or index will be consumed by the draw clause. */ u32 get_elements_count() const { - return std::accumulate(first_count_commands.begin(), first_count_commands.end(), 0, - [](u32 acc, auto b) { return acc + b.second; }); + u32 count = 0; + for (const auto &draw : draw_command_ranges) + { + count += draw.count; + } + + return count; + } + + /** + * Optimize draw command stream for rendering + */ + void compile() + { + + } + + /** + * Insert one command range + */ + void append(u32 first, u32 count) + { + + } + + u32 min_index() + { + LOG_FATAL(RSX, "Unimplemented"); + return 0; } }; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 416be20263..b1e8a8bd03 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -296,6 +296,7 @@ + @@ -534,6 +535,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 79039e3a37..5d0bd65ffd 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -749,6 +749,9 @@ Emu\GPU\RSX\Overlays + + Emu\GPU\RSX + @@ -1440,7 +1443,7 @@ Utilities - + Utilities @@ -1449,6 +1452,9 @@ Emu\GPU\RSX\Common + + + Emu\GPU\RSX \ No newline at end of file