From 846daadd5dc70e069716c48997a88f9152a4552e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 1 Nov 2018 00:25:59 +0300 Subject: [PATCH] rsx: Fixups - Improve vertex attribute layout format. Allows for full 16-bit attribute divisor - Use actual pitch when declaring framebuffer rsx pitch instead of register value in case of swizzle? rendering --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 46 +++++++++-------- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 11 ++-- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 4 +- rpcs3/Emu/RSX/RSXThread.cpp | 77 +++++++++++++--------------- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 19 ++++--- 5 files changed, 77 insertions(+), 80 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index e4840f9491..c334fc3fbe 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -168,7 +168,7 @@ namespace glsl " int swap_bytes;\n" " int is_volatile;\n" " int frequency;\n" - " int divisor;\n" + " int modulo;\n" "};\n\n" "uint get_bits(uvec4 v, int swap)\n" @@ -291,22 +291,24 @@ namespace glsl "{\n" " // Each descriptor is 64 bits wide\n" " // [0-8] attribute stride\n" - " // [8-20] attribute divisor\n" - " // [20-21] swap bytes flag\n" - " // [21-22] volatile flag\n" - " // [22-24] frequency op\n" + " // [8-24] attribute divisor\n" " // [24-27] attribute type\n" " // [27-30] attribute size\n" + " // [32-60] starting offset\n" + " // [60-61] swap bytes flag\n" + " // [61-62] volatile flag\n" + " // [62-63] modulo enable flag\n" " attribute_desc result;\n" - " int attribute_flags = input_attributes[location].x;\n" - " result.stride = attribute_flags & 0xFF;\n" - " result.divisor = (attribute_flags >> 8) & 0xFFF;\n" - " result.swap_bytes = (attribute_flags >> 20) & 0x1;\n" - " result.is_volatile = (attribute_flags >> 21) & 0x1;\n" - " result.frequency = (attribute_flags >> 22) & 0x3;\n" - " result.type = (attribute_flags >> 24) & 0x7;\n" - " result.attribute_size = (attribute_flags >> 27) & 0x7;\n" - " result.starting_offset = input_attributes[location].y;\n" + " int attrib0 = input_attributes[location].x;\n" + " int attrib1 = input_attributes[location].y;\n" + " result.stride = attrib0 & 0xFF;\n" + " result.frequency = (attrib0 >> 8) & 0xFFFF;\n" + " result.type = (attrib0 >> 24) & 0x7;\n" + " result.attribute_size = (attrib0 >> 27) & 0x7;\n" + " result.starting_offset = (attrib1 & 0x1FFFFFFF);\n" + " result.swap_bytes = (attrib1 >> 29) & 0x1;\n" + " result.is_volatile = (attrib1 >> 30) & 0x1;\n" + " result.modulo = (attrib1 >> 31) & 0x3;\n" " return result;\n" "}\n\n" @@ -334,15 +336,17 @@ namespace glsl " {\n" " vertex_id = 0;\n" " }\n" - " else if (desc.frequency == 2)\n" + " else if (desc.frequency > 1)\n" " {\n" " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" - " vertex_id = " << vertex_id_name << " / desc.divisor;\n" - " }\n" - " else if (desc.frequency == 3)\n" - " {\n" - " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" - " vertex_id = " << vertex_id_name << " % desc.divisor;\n" + " if (desc.modulo != 0)\n" + " {\n" + " vertex_id = " << vertex_id_name << " % desc.frequency;\n" + " }\n" + " else\n" + " {\n" + " vertex_id = " << vertex_id_name << " / desc.frequency; \n" + " }\n" " }\n" "\n" " if (desc.is_volatile != 0)\n" diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 8d9f08756a..59df76cbe4 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1301,17 +1301,16 @@ void GLGSRender::load_program_env() if (update_vertex_env) { // Vertex state - auto mapping = m_vertex_env_buffer->alloc_from_heap(160, m_uniform_buffer_offset_align); + auto mapping = m_vertex_env_buffer->alloc_from_heap(144, m_uniform_buffer_offset_align); auto buf = static_cast(mapping.first); fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = 0; // Reserved - *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); - *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); - *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); + *(reinterpret_cast(buf + 132)) = rsx::method_registers.point_size(); + *(reinterpret_cast(buf + 136)) = rsx::method_registers.clip_min(); + *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_max(); - m_vertex_env_buffer->bind_range(0, mapping.second, 160); + m_vertex_env_buffer->bind_range(0, mapping.second, 144); } if (update_transform_constants) diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 7ac39d196d..5d221bc595 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -247,7 +247,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk auto rtt = std::get<1>(m_rtts.m_bound_render_targets[i]); color_targets[i] = rtt->id(); - rtt->set_rsx_pitch(layout.color_pitch[i]); + rtt->set_rsx_pitch(layout.actual_color_pitch[i]); m_surface_info[i] = { layout.color_addresses[i], layout.actual_color_pitch[i], false, layout.color_format, layout.depth_format, layout.width, layout.height }; rtt->tile = find_tile(color_offsets[i], color_locations[i]); @@ -277,7 +277,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); depth_stencil_target = ds->id(); - std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); + std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(layout.actual_zeta_pitch); m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height }; ds->write_aa_mode = layout.aa_mode; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index e4d8ace74c..a09d8a3958 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2024,21 +2024,20 @@ namespace rsx // Fill the data // Each descriptor field is 64 bits wide - // [0-8] attribute stride\n" - // [8-20] attribute divisor\n" - // [20-21] swap bytes flag\n" - // [21-22] volatile flag\n" - // [22-24] frequency op\n" - // [24-27] attribute type\n" - // [27-30] attribute size\n" + // [0-8] attribute stride + // [8-24] attribute divisor + // [24-27] attribute type + // [27-30] attribute size + // [30-31] reserved + // [31-60] starting offset + // [60-21] swap bytes flag + // [61-22] volatile flag + // [62-63] modulo enable flag - memset(buffer, 0, 256); - - const s32 swap_storage_mask = (1 << 20); - const s32 volatile_storage_mask = (1 << 21); - const s32 default_frequency_mask = (1 << 22); - const s32 division_op_frequency_mask = (2 << 22); - const s32 modulo_op_frequency_mask = (3 << 22); + const s32 default_frequency_mask = (1 << 8); + const s32 swap_storage_mask = (1 << 29); + const s32 volatile_storage_mask = (1 << 30); + const s32 modulo_op_frequency_mask = (1 << 31); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); @@ -2049,30 +2048,28 @@ namespace rsx rsx::vertex_base_type type = {}; s32 size = 0; - s32 attributes = 0; + s32 attrib0 = 0; + s32 attrib1 = 0; if (layout.attribute_placement[index] == attribute_buffer_placement::transient) { if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) { - auto &info = rsx::method_registers.vertex_arrays_info[index]; - type = info.type(); - size = info.size(); - - if (!size) + const auto &info = rsx::method_registers.vertex_arrays_info[index]; + + if (!info.size()) { // Register const auto& reginfo = rsx::method_registers.register_vertex_info[index]; type = reginfo.type; size = reginfo.size; - attributes = rsx::get_vertex_type_size_on_host(type, size); - attributes |= volatile_storage_mask; + attrib0 = rsx::get_vertex_type_size_on_host(type, size); } else { - attributes = layout.interleaved_blocks[0].attribute_stride; - attributes |= default_frequency_mask | volatile_storage_mask; + // Array + attrib0 = layout.interleaved_blocks[0].attribute_stride | default_frequency_mask; } } else @@ -2083,12 +2080,12 @@ namespace rsx if (rsx::method_registers.current_draw_clause.is_immediate_draw && vertex_push_buffers[index].vertex_count > 1) { + // Push buffer const auto &info = rsx::method_registers.register_vertex_info[index]; type = info.type; size = info.size; - attributes = rsx::get_vertex_type_size_on_host(type, size); - attributes |= default_frequency_mask | volatile_storage_mask; + attrib0 = rsx::get_vertex_type_size_on_host(type, size) | default_frequency_mask; } else { @@ -2097,10 +2094,11 @@ namespace rsx type = info.type; size = info.size; - attributes = rsx::get_vertex_type_size_on_host(type, size); - attributes |= volatile_storage_mask; + attrib0 = rsx::get_vertex_type_size_on_host(type, size); } } + + attrib1 |= volatile_storage_mask; } else { @@ -2109,7 +2107,7 @@ namespace rsx size = info.size(); auto stride = info.stride(); - attributes |= stride; + attrib0 = stride; if (stride > 0) //when stride is 0, input is not an array but a single element { @@ -2119,19 +2117,15 @@ namespace rsx case 0: case 1: { - attributes |= default_frequency_mask; + attrib0 |= default_frequency_mask; break; } default: { - verify(HERE), frequency <= 4095u; - if (modulo_mask & (1 << index)) - attributes |= modulo_op_frequency_mask; - else - attributes |= division_op_frequency_mask; + attrib1 |= modulo_op_frequency_mask; - attributes |= (frequency << 8); + attrib0 |= (frequency << 8); break; } } @@ -2155,13 +2149,14 @@ namespace rsx break; } - if (to_swap_bytes) attributes |= swap_storage_mask; + if (to_swap_bytes) attrib1 |= swap_storage_mask; - attributes |= (static_cast(type) << 24); - attributes |= (size << 27); + attrib0 |= (static_cast(type) << 24); + attrib0 |= (size << 27); + attrib1 |= offset_in_block[index]; - buffer[index * 4 + 0] = attributes; - buffer[index * 4 + 1] = offset_in_block[index]; + buffer[index * 4 + 0] = attrib0; + buffer[index * 4 + 1] = attrib1; } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index e4f231c954..15f0a4fa85 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2598,18 +2598,17 @@ void VKGSRender::load_program_env() { // Vertex state const auto mem = m_vertex_env_ring_info.alloc<256>(256); - auto buf = (u8*)m_vertex_env_ring_info.map(mem, 160); + auto buf = (u8*)m_vertex_env_ring_info.map(mem, 144); fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = 0; // Reserved - *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); - *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); - *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); + *(reinterpret_cast(buf + 132)) = rsx::method_registers.point_size(); + *(reinterpret_cast(buf + 136)) = rsx::method_registers.clip_min(); + *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_max(); m_vertex_env_ring_info.unmap(); - m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 160 }; + m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 144 }; } if (update_transform_constants) @@ -2883,8 +2882,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) bound_images.push_back(surface); m_surface_info[index].address = layout.color_addresses[index]; - m_surface_info[index].pitch = layout.color_pitch[index]; - surface->rsx_pitch = layout.color_pitch[index]; + m_surface_info[index].pitch = layout.actual_color_pitch[index]; + surface->rsx_pitch = layout.actual_color_pitch[index]; surface->write_aa_mode = layout.aa_mode; m_texture_cache.notify_surface_changed(layout.color_addresses[index]); @@ -2899,8 +2898,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) bound_images.push_back(ds); m_depth_surface_info.address = layout.zeta_address; - m_depth_surface_info.pitch = layout.zeta_pitch; - ds->rsx_pitch = layout.zeta_pitch; + m_depth_surface_info.pitch = layout.actual_zeta_pitch; + ds->rsx_pitch = layout.actual_zeta_pitch; ds->write_aa_mode = layout.aa_mode; m_texture_cache.notify_surface_changed(layout.zeta_address);