diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 8d9aecfaee..ce7d0130a2 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -155,9 +155,38 @@ namespace rsx rcount = ::size32(fifo_span); } - copy_data_swap_u32(®S(ctx)->transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount); + const auto out_ptr = ®S(ctx)->transform_program[load_pos * 4 + index % 4]; - RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty; + pipeline_state to_set_dirty = rsx::pipeline_state::vertex_program_ucode_dirty; + + if (rcount >= 4 && !RSX(ctx)->m_graphics_state.test(rsx::pipeline_state::vertex_program_ucode_dirty)) + { + // Assume clean + to_set_dirty = {}; + + const usz first_index_off = 0; + const usz second_index_off = (((rcount / 4) - 1) / 2) * 4; + + const u64 src_op1_2 = read_from_ptr>(fifo_span.data() + first_index_off); + const u64 src_op2_2 = read_from_ptr>(fifo_span.data() + second_index_off); + + // Fast comparison + if (src_op1_2 != read_from_ptr(out_ptr + first_index_off) || src_op2_2 != read_from_ptr(out_ptr + second_index_off)) + { + to_set_dirty = rsx::pipeline_state::vertex_program_ucode_dirty; + } + } + + if (to_set_dirty) + { + copy_data_swap_u32(out_ptr, fifo_span.data(), rcount); + } + else if (copy_data_swap_u32_cmp(out_ptr, fifo_span.data(), rcount)) + { + to_set_dirty = rsx::pipeline_state::vertex_program_ucode_dirty; + } + + RSX(ctx)->m_graphics_state |= to_set_dirty; REGS(ctx)->transform_program_load_set(load_pos + ((rcount + index % 4) / 4)); RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1); } diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.h b/rpcs3/Emu/RSX/Program/ProgramStateCache.h index 17857b80ad..c4bacedf30 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.h @@ -172,16 +172,35 @@ protected: bool recompile = false; vertex_program_type* new_shader; { + thread_local const std::pair* prev_vp = nullptr; + thread_local usz prev_count = umax; + static atomic_t invl_count = 0; + reader_lock lock(m_vertex_mutex); + if (prev_count == invl_count) + { + // prev_vp must be non-null here + if (prev_vp->first.data.size() == rsx_vp.data.size() && prev_vp->first.output_mask == rsx_vp.output_mask) + { + if (program_hash_util::vertex_program_compare()(prev_vp->first, rsx_vp)) + { + return std::forward_as_tuple(prev_vp->second, true); + } + } + } + const auto& I = m_vertex_shader_cache.find(rsx_vp); if (I != m_vertex_shader_cache.end()) { + prev_vp = &*I; + prev_count = invl_count; return std::forward_as_tuple(I->second, true); } if (!force_load) { + prev_count = umax; return std::forward_as_tuple(__null_vertex_program, false); } @@ -191,6 +210,8 @@ protected: auto [it, inserted] = m_vertex_shader_cache.try_emplace(rsx_vp); new_shader = &(it->second); recompile = inserted; + prev_count = umax; + invl_count++; } if (recompile) @@ -209,16 +230,35 @@ protected: fragment_program_type* new_shader; { + thread_local const std::pair* prev_fp = nullptr; + thread_local usz prev_count = umax; + static atomic_t invl_count = 0; + reader_lock lock(m_fragment_mutex); + if (prev_count == invl_count) + { + // prev_vp must be non-null here + if (prev_fp->first.ucode_length == rsx_fp.ucode_length && prev_fp->first.texcoord_control_mask == rsx_fp.texcoord_control_mask) + { + if (program_hash_util::fragment_program_compare()(prev_fp->first, rsx_fp)) + { + return std::forward_as_tuple(prev_fp->second, true); + } + } + } + const auto& I = m_fragment_shader_cache.find(rsx_fp); if (I != m_fragment_shader_cache.end()) { + prev_fp = &*I; + prev_count = invl_count; return std::forward_as_tuple(I->second, true); } if (!force_load) { + prev_count = umax; return std::forward_as_tuple(__null_fragment_program, false); } @@ -227,6 +267,8 @@ protected: lock.upgrade(); std::tie(it, recompile) = m_fragment_shader_cache.try_emplace(rsx_fp); new_shader = &(it->second); + prev_count = umax; + invl_count++; } if (recompile)