diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 5295ac1098..5f6ac1f514 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -522,6 +522,12 @@ namespace rsx */ void invalidate_surface_address(u32 addr, bool depth) { + if (address_is_bound(addr, depth)) + { + LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!"); + return; + } + if (!depth) { auto It = m_render_targets_storage.find(addr); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 90bbdde32d..c18360beab 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -25,6 +25,14 @@ namespace rsx framebuffer_storage = 3 }; + //Sampled image descriptor + struct sampled_image_descriptor_base + { + texture_upload_context upload_context = texture_upload_context::shader_read; + bool is_depth_texture = false; + f32 internal_scale = 1.f; + }; + struct cached_texture_section : public rsx::buffered_section { u16 width; @@ -204,6 +212,22 @@ namespace rsx u32 address_range = 0; }; + struct sampled_image_descriptor : public sampled_image_descriptor_base + { + image_view_type image_handle = 0; + + sampled_image_descriptor() + {} + + sampled_image_descriptor(image_view_type handle, const texture_upload_context ctx, const bool is_depth, const f32 scale) + { + image_handle = handle; + upload_context = ctx; + is_depth_texture = is_depth; + internal_scale = scale; + } + }; + private: //Internal implementation methods and helpers @@ -567,6 +591,15 @@ namespace rsx region.set_dirty(false); no_access_range = region.get_min_max(no_access_range); } + else + { + if (region.get_context() != texture_upload_context::framebuffer_storage) + { + //This space was being used for other purposes other than framebuffer storage + //Delete used resources before attaching it to framebuffer memory + free_texture_section(region); + } + } region.protect(utils::protection::no); region.create(width, height, 1, 1, nullptr, image, pitch, false, std::forward(extras)...); @@ -831,7 +864,7 @@ namespace rsx } template - image_view_type upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras) + sampled_image_descriptor upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras) { const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); const u32 tex_size = (u32)get_texture_size(tex); @@ -841,7 +874,7 @@ namespace rsx if (!texaddr || !tex_size) { LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, tex_size); - return 0; + return {}; } const auto extended_dimension = tex.get_extended_texture_dimension(); @@ -857,6 +890,7 @@ namespace rsx if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d) LOG_ERROR(RSX, "Texture resides in render target memory, but requested type is not 2D (%d)", (u32)extended_dimension); + f32 internal_scale = (f32)texptr->get_native_pitch() / tex.pitch(); for (const auto& tex : m_rtts.m_bound_render_targets) { if (std::get<0>(tex) == texaddr) @@ -864,7 +898,7 @@ namespace rsx if (g_cfg.video.strict_rendering_mode) { LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); - return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()); + return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()), texture_upload_context::framebuffer_storage, false, internal_scale }; } else { @@ -875,8 +909,10 @@ namespace rsx } } - return texptr->get_view(); + return{ texptr->get_view(), texture_upload_context::framebuffer_storage, false, internal_scale }; } + else + m_rtts.invalidate_surface_address(texaddr, false); } if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) @@ -886,12 +922,13 @@ namespace rsx if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d) LOG_ERROR(RSX, "Texture resides in depth buffer memory, but requested type is not 2D (%d)", (u32)extended_dimension); + f32 internal_scale = (f32)texptr->get_native_pitch() / tex.pitch(); if (texaddr == std::get<0>(m_rtts.m_bound_depth_stencil)) { if (g_cfg.video.strict_rendering_mode) { LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr); - return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()); + return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()), texture_upload_context::framebuffer_storage, true, internal_scale }; } else { @@ -900,8 +937,10 @@ namespace rsx } } - return texptr->get_view(); + return{ texptr->get_view(), texture_upload_context::framebuffer_storage, true, internal_scale }; } + else + m_rtts.invalidate_surface_address(texaddr, true); } } @@ -945,10 +984,14 @@ namespace rsx const u32 internal_width = (const u32)(tex_width * internal_scale); const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true); - if (rsc.surface/* && test_framebuffer(texaddr)*/) + if (rsc.surface) { //TODO: Check that this region is not cpu-dirty before doing a copy - if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d) + if (!test_framebuffer(texaddr)) + { + m_rtts.invalidate_surface_address(texaddr, rsc.is_depth_surface); + } + else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d) { LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d", texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height()); @@ -965,16 +1008,16 @@ namespace rsx insert_texture_barrier(); } - return rsc.surface->get_view(); + return{ rsc.surface->get_view(), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f }; } - else return create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false), - rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)); + else return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false), + rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f }; } else { LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); - return create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false), - rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)); + return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false), + rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, 1.f }; } } } @@ -987,7 +1030,7 @@ namespace rsx auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height, depth); if (cached_texture) { - return cached_texture->get_raw_view(); + return{ cached_texture->get_raw_view(), cached_texture->get_context(), cached_texture->is_depth_texture(), 1.f }; } if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format)) @@ -1030,7 +1073,7 @@ namespace rsx auto src_image = surface->get_raw_texture(); if (auto result = create_temporary_subresource_view(cmd, &src_image, format, offset_x, offset_y, tex_width, tex_height)) - return result; + return{ result, texture_upload_context::blit_engine_dst, surface->is_depth_texture(), 1.f }; } } } @@ -1048,8 +1091,9 @@ namespace rsx invalidate_range_impl_base(texaddr, tex_size, false, false, false, true, std::forward(extras)...); m_texture_memory_in_use += (tex_pitch * tex_height); - return upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, - texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view(); + return{ upload_image_from_cpu(cmd, texaddr, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, + texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view(), + texture_upload_context::shader_read, false, 1.f }; } template diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index c3f04f0347..fb90d1c609 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -54,7 +54,7 @@ void D3D12GSRender::load_program() }; get_current_vertex_program(); - get_current_fragment_program(rtt_lookup_func); + get_current_fragment_program_legacy(rtt_lookup_func); if (!current_fragment_program.valid) return; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c599a16d4d..ba6aa5fe8c 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -347,6 +347,72 @@ void GLGSRender::end() std::optional > indexed_draw_info; std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer(); + //Load textures + { + std::chrono::time_point textures_start = steady_clock::now(); + + std::lock_guard lock(m_sampler_mutex); + void* unused = nullptr; + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (m_samplers_dirty || m_textures_dirty[i]) + { + if (!fs_sampler_state[i]) + fs_sampler_state[i] = std::make_unique(); + + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled()) + { + glActiveTexture(GL_TEXTURE0 + i); + + *sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.fragment_textures[i], m_rtts); + m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]); + + GLenum target = get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]); + glBindTexture(target, sampler_state->image_handle); + } + else + { + *sampler_state = {}; + } + + m_textures_dirty[i] = false; + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + int texture_index = i + rsx::limits::fragment_textures_count; + + if (m_samplers_dirty || m_vertex_textures_dirty[i]) + { + if (!vs_sampler_state[i]) + vs_sampler_state[i] = std::make_unique(); + + auto sampler_state = static_cast(vs_sampler_state[i].get()); + + if (rsx::method_registers.vertex_textures[i].enabled()) + { + glActiveTexture(GL_TEXTURE0 + texture_index); + + *sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.vertex_textures[i], m_rtts); + glBindTexture(GL_TEXTURE_2D, static_cast(vs_sampler_state[i].get())->image_handle); + } + else + *sampler_state = {}; + + m_vertex_textures_dirty[i] = false; + } + } + + m_samplers_dirty.store(false); + + std::chrono::time_point textures_end = steady_clock::now(); + m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); + } + std::chrono::time_point program_start = steady_clock::now(); //Load program here since it is dependent on vertex state @@ -462,43 +528,6 @@ void GLGSRender::end() glEnable(GL_SCISSOR_TEST); - std::chrono::time_point textures_start = steady_clock::now(); - - //Setup textures - //Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - int location; - if (rsx::method_registers.fragment_textures[i].enabled() && m_program->uniforms.has_location("tex" + std::to_string(i), &location)) - { - m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts); - - if (m_textures_dirty[i]) - { - m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]); - m_textures_dirty[i] = false; - } - } - } - - //Vertex textures - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - int texture_index = i + rsx::limits::fragment_textures_count; - int location; - - if (!rsx::method_registers.vertex_textures[i].enabled()) - continue; - - if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location)) - { - m_gl_texture_cache.upload_and_bind_texture(texture_index, GL_TEXTURE_2D, rsx::method_registers.vertex_textures[i], m_rtts); - } - } - - std::chrono::time_point textures_end = steady_clock::now(); - m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); - std::chrono::time_point draw_start = steady_clock::now(); if (g_cfg.video.debug_output) @@ -952,44 +981,16 @@ bool GLGSRender::do_method(u32 cmd, u32 arg) bool GLGSRender::check_program_state() { - auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple - { - gl::render_target *surface = nullptr; - if (!is_depth) - surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); - else - surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - - const bool dirty_framebuffer = (surface != nullptr && !m_gl_texture_cache.test_framebuffer(texaddr)); - if (dirty_framebuffer || !surface) - { - if (is_depth && m_gl_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex))) - return std::make_tuple(true, 0); - - if (dirty_framebuffer) - return std::make_tuple(false, 0); - - auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth); - if (!rsc.surface || rsc.is_depth_surface != is_depth) - return std::make_tuple(false, 0); - - surface = rsc.surface; - } - - return std::make_tuple(true, surface->get_native_pitch()); - }; - - get_current_fragment_program(rtt_lookup_func); - - if (current_fragment_program.valid == false) - return false; - - get_current_vertex_program(); - return true; + return (rsx::method_registers.shader_program_address() != 0); } void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) { + get_current_fragment_program(fs_sampler_state); + verify(HERE), current_fragment_program.valid; + + get_current_vertex_program(); + auto &fragment_program = current_fragment_program; auto &vertex_program = current_vertex_program; @@ -1061,7 +1062,7 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count) } m_transform_constants_dirty = false; -} +} void GLGSRender::flip(int buffer) { @@ -1228,6 +1229,11 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) if (!result.violation_handled) return false; + { + std::lock_guard lock(m_sampler_mutex); + m_samplers_dirty.store(true); + } + if (result.num_flushable > 0) { work_item &task = post_flush_request(address, result); @@ -1249,7 +1255,13 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { //Discard all memory in that range without bothering with writeback (Force it for strict?) if (m_gl_texture_cache.invalidate_range(address_base, size, true, true, false).violation_handled) + { m_gl_texture_cache.purge_dirty(); + { + std::lock_guard lock(m_sampler_mutex); + m_samplers_dirty.store(true); + } + } } void GLGSRender::do_local_task() @@ -1296,6 +1308,7 @@ void GLGSRender::synchronize_buffers() bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) { + m_samplers_dirty.store(true); return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 30b88e91b3..b7b9bebafd 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -310,33 +310,6 @@ struct driver_state } }; -struct sw_ring_buffer -{ - std::vector data; - u32 ring_pos = 0; - u32 ring_length = 0; - - sw_ring_buffer(u32 size) - { - data.resize(size); - ring_length = size; - } - - void* get(u32 dwords) - { - const u32 required = (dwords << 2); - if ((ring_pos + required) > ring_length) - { - ring_pos = 0; - return data.data(); - } - - void *result = data.data() + ring_pos; - ring_pos += required; - return result; - } -}; - class GLGSRender : public GSRender { private: @@ -405,6 +378,11 @@ private: const u32 occlusion_query_count = 128; std::array occlusion_query_data = {}; + std::mutex m_sampler_mutex; + std::atomic_bool m_samplers_dirty = {true}; + std::array, rsx::limits::fragment_textures_count> fs_sampler_state = {}; + std::array, rsx::limits::vertex_textures_count> vs_sampler_state = {}; + public: GLGSRender(); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 91b3db0278..6c3799b384 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -783,15 +783,5 @@ namespace gl void* unused = nullptr; return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter); } - - template - void upload_and_bind_texture(int index, GLenum target, RsxTextureType &tex, gl_render_targets &m_rtts) - { - glActiveTexture(GL_TEXTURE0 + index); - void* unused = nullptr; - - auto id = upload_texture(unused, tex, m_rtts); - glBindTexture(target, id); - } }; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index c0f8f82cac..0965e9314b 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -7,6 +7,7 @@ #include "Emu/Cell/PPUCallback.h" #include "Common/BufferUtils.h" +#include "Common/texture_cache.h" #include "rsx_methods.h" #include "rsx_utils.h" @@ -243,6 +244,7 @@ namespace rsx }; m_rtts_dirty = true; memset(m_textures_dirty, -1, sizeof(m_textures_dirty)); + memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty)); m_transform_constants_dirty = true; } @@ -1321,7 +1323,108 @@ namespace rsx return result; } - void thread::get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info) + void thread::get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors) + { + auto &result = current_fragment_program = {}; + + const u32 shader_program = rsx::method_registers.shader_program_address(); + if (shader_program == 0) + return; + + const u32 program_location = (shader_program & 0x3) - 1; + const u32 program_offset = (shader_program & ~0x3); + + result.offset = program_offset; + result.addr = vm::base(rsx::get_address(program_offset, program_location)); + result.valid = true; + result.ctrl = rsx::method_registers.shader_control(); + result.unnormalized_coords = 0; + result.front_back_color_enabled = !rsx::method_registers.two_side_light_en(); + result.back_color_diffuse_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_BACKDIFFUSE); + result.back_color_specular_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_BACKSPECULAR); + result.front_color_diffuse_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTDIFFUSE); + result.front_color_specular_output = !!(rsx::method_registers.vertex_attrib_output_mask() & CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR); + result.redirected_textures = 0; + result.shadow_textures = 0; + + std::array texture_dimensions; + const auto resolution_scale = rsx::get_resolution_scale(); + + for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + auto &tex = rsx::method_registers.fragment_textures[i]; + result.texture_scale[i][0] = 1.f; + result.texture_scale[i][1] = 1.f; + result.textures_alpha_kill[i] = 0; + result.textures_zfunc[i] = 0; + + if (!tex.enabled()) + { + texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d; + } + else + { + texture_dimensions[i] = tex.get_extended_texture_dimension(); + + if (tex.alpha_kill_enabled()) + { + //alphakill can be ignored unless a valid comparison function is set + const rsx::comparison_function func = (rsx::comparison_function)tex.zfunc(); + if (func < rsx::comparison_function::always && func > rsx::comparison_function::never) + { + result.textures_alpha_kill[i] = 1; + result.textures_zfunc[i] = (u8)func; + } + } + + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + const u32 raw_format = tex.format(); + + if (raw_format & CELL_GCM_TEXTURE_UN) + result.unnormalized_coords |= (1 << i); + + if (sampler_descriptors[i]->upload_context == rsx::texture_upload_context::framebuffer_storage) + { + if (raw_format & CELL_GCM_TEXTURE_UN) + { + result.texture_scale[i][0] = (resolution_scale * sampler_descriptors[i]->internal_scale); + result.texture_scale[i][1] = resolution_scale; + } + } + + if (sampler_descriptors[i]->is_depth_texture) + { + const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + result.redirected_textures |= (1 << i); + break; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + { + const auto compare_mode = (rsx::comparison_function)tex.zfunc(); + if (result.textures_alpha_kill[i] == 0 && + compare_mode < rsx::comparison_function::always && + compare_mode > rsx::comparison_function::never) + result.shadow_textures |= (1 << i); + break; + } + default: + LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format); + } + } + } + } + + result.set_texture_dimension(texture_dimensions); + } + + void thread::get_current_fragment_program_legacy(std::function(u32, fragment_texture&, bool)> get_surface_info) { auto &result = current_fragment_program = {}; @@ -1412,8 +1515,8 @@ namespace rsx case CELL_GCM_TEXTURE_D8R8G8B8: case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_R5G6B5: - result.redirected_textures |= (1 << i); - break; + result.redirected_textures |= (1 << i); + break; case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: @@ -1426,7 +1529,7 @@ namespace rsx break; } default: - LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format); + LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format); } } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 85cda50916..ecbda23b7b 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -135,6 +135,8 @@ namespace rsx std::array attribute_placement; }; + struct sampled_image_descriptor_base; + class thread : public named_thread { std::shared_ptr m_vblank_thread; @@ -188,6 +190,7 @@ namespace rsx bool m_rtts_dirty; bool m_transform_constants_dirty; bool m_textures_dirty[16]; + bool m_vertex_textures_dirty[4]; protected: std::array get_color_surface_addresses() const; @@ -208,7 +211,9 @@ namespace rsx * get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth * returns whether surface is a render target and surface pitch in native format */ - void get_current_fragment_program(std::function(u32, fragment_texture&, bool)> get_surface_info); + void get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors); + void get_current_fragment_program_legacy(std::function(u32, fragment_texture&, bool)> get_surface_info); + public: double fps_limit = 59.94; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 4910103089..9b237a2199 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -712,6 +712,14 @@ VKGSRender::~VKGSRender() m_rtts.destroy(); m_texture_cache.destroy(); + //Sampler handles + for (auto& handle : fs_sampler_handles) + handle.reset(); + + for (auto& handle : vs_sampler_handles) + handle.reset(); + + //Overlay text handler m_text_writer.reset(); //Pipeline descriptors @@ -750,6 +758,11 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (!result.violation_handled) return false; + { + std::lock_guard lock(m_sampler_mutex); + m_samplers_dirty.store(true); + } + if (result.num_flushable > 0) { const bool is_rsxthr = std::this_thread::get_id() == rsx_thread; @@ -845,6 +858,23 @@ void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()).violation_handled) { m_texture_cache.purge_dirty(); + { + std::lock_guard lock(m_sampler_mutex); + m_samplers_dirty.store(true); + } + } +} + +void VKGSRender::notify_tile_unbound(u32 tile) +{ + //TODO: Handle texture writeback + //u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location); + //on_notify_memory_unmapped(addr, tiles[tile].size); + //m_rtts.invalidate_surface_address(addr, false); + + { + std::lock_guard lock(m_sampler_mutex); + m_samplers_dirty.store(true); } } @@ -995,8 +1025,6 @@ void VKGSRender::end() return; } - std::chrono::time_point state_check_start = steady_clock::now(); - //Load program here since it is dependent on vertex state if (!check_program_status()) { @@ -1005,24 +1033,152 @@ void VKGSRender::end() return; } - std::chrono::time_point state_check_end = steady_clock::now(); - m_setup_time += (u32)std::chrono::duration_cast(state_check_end - state_check_start).count(); + //Close current pass to avoid conflict with texture functions + close_render_pass(); //Programs data is dependent on vertex state - std::chrono::time_point vertex_start = state_check_end; + std::chrono::time_point vertex_start = steady_clock::now(); auto upload_info = upload_vertex_data(); std::chrono::time_point vertex_end = steady_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(vertex_end - vertex_start).count(); + std::chrono::time_point textures_start = vertex_end; + //Load textures + { + std::lock_guard lock(m_sampler_mutex); + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) + { + if (m_samplers_dirty || m_textures_dirty[i]) + { + if (!fs_sampler_state[i]) + fs_sampler_state[i] = std::make_unique(); + + auto sampler_state = static_cast(fs_sampler_state[i].get()); + + if (rsx::method_registers.fragment_textures[i].enabled()) + { + *sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); + + const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + VkCompareOp depth_compare = fs_sampler_state[i]->is_depth_texture ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER; + + bool replace = !fs_sampler_handles[i]; + VkFilter min_filter; + VkSamplerMipmapMode mip_mode; + f32 min_lod = 0.f, max_lod = 0.f; + f32 lod_bias = 0.f; + + const f32 af_level = g_cfg.video.anisotropic_level_override > 0 ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); + const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()); + const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()); + const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()); + const auto unnormalized_coords = !!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN); + const auto mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); + const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()); + + std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter()); + + if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1) + { + min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8); + max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8); + lod_bias = rsx::method_registers.fragment_textures[i].bias(); + } + else + { + mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + } + + if (fs_sampler_handles[i]) + { + if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, unnormalized_coords, lod_bias, af_level, min_lod, max_lod, + min_filter, mag_filter, mip_mode, border_color, fs_sampler_state[i]->is_depth_texture, depth_compare)) + { + m_current_frame->samplers_to_clean.push_back(std::move(fs_sampler_handles[i])); + replace = true; + } + } + + if (replace) + { + fs_sampler_handles[i] = std::make_unique(*m_device, wrap_s, wrap_t, wrap_r, unnormalized_coords, lod_bias, af_level, min_lod, max_lod, + min_filter, mag_filter, mip_mode, border_color, fs_sampler_state[i]->is_depth_texture, depth_compare); + } + } + else + { + *sampler_state = {}; + } + + m_textures_dirty[i] = false; + } + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + int texture_index = i + rsx::limits::fragment_textures_count; + + if (m_samplers_dirty || m_vertex_textures_dirty[i]) + { + if (!vs_sampler_state[i]) + vs_sampler_state[i] = std::make_unique(); + + auto sampler_state = static_cast(vs_sampler_state[i].get()); + + if (rsx::method_registers.vertex_textures[i].enabled()) + { + *sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); + + bool replace = !vs_sampler_handles[i]; + const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); + const auto min_lod = (f32)rsx::method_registers.vertex_textures[i].min_lod(); + const auto max_lod = (f32)rsx::method_registers.vertex_textures[i].max_lod(); + const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()); + + if (vs_sampler_handles[i]) + { + if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, + unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color)) + { + m_current_frame->samplers_to_clean.push_back(std::move(vs_sampler_handles[i])); + replace = true; + } + } + + if (replace) + { + vs_sampler_handles[i] = std::make_unique( + *m_device, + VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, + unnormalized_coords, + 0.f, 1.f, min_lod, max_lod, + VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color); + } + } + else + *sampler_state = {}; + + m_vertex_textures_dirty[i] = false; + } + } + + m_samplers_dirty.store(false); + } + + std::chrono::time_point textures_end = steady_clock::now(); + m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); + //Load program - std::chrono::time_point program_start = vertex_end; + std::chrono::time_point program_start = textures_end; load_program(std::get<2>(upload_info), std::get<3>(upload_info)); + + m_program->bind_uniform(m_persistent_attribute_storage, "persistent_input_stream", m_current_frame->descriptor_set); + m_program->bind_uniform(m_volatile_attribute_storage, "volatile_input_stream", m_current_frame->descriptor_set); + std::chrono::time_point program_stop = steady_clock::now(); m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); - //Close current pass to avoid conflict with texture functions - close_render_pass(); - if (g_cfg.video.strict_rendering_mode) { auto copy_rtt_contents = [&](vk::render_target* surface) @@ -1079,7 +1235,7 @@ void VKGSRender::end() } } - std::chrono::time_point textures_start = steady_clock::now(); + textures_start = steady_clock::now(); for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { @@ -1091,48 +1247,15 @@ void VKGSRender::end() continue; } - vk::image_view *texture0 = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); - - if (!texture0) + auto sampler_state = static_cast(fs_sampler_state[i].get()); + if (!sampler_state->image_handle) { LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set); continue; } - const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - - VkBool32 is_depth_texture = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8); - VkCompareOp depth_compare = is_depth_texture? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true): VK_COMPARE_OP_NEVER; - - VkFilter min_filter; - VkSamplerMipmapMode mip_mode; - float min_lod = 0.f, max_lod = 0.f; - float lod_bias = 0.f; - - std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter()); - - if (rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1) - { - min_lod = (float)(rsx::method_registers.fragment_textures[i].min_lod() >> 8); - max_lod = (float)(rsx::method_registers.fragment_textures[i].max_lod() >> 8); - lod_bias = rsx::method_registers.fragment_textures[i].bias(); - } - else - { - mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - } - - f32 af_level = g_cfg.video.anisotropic_level_override > 0 ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); - m_current_frame->samplers_to_clean.push_back(std::make_unique( - *m_device, - vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()), vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()), - !!(rsx::method_registers.fragment_textures[i].format() & CELL_GCM_TEXTURE_UN), - lod_bias, af_level, min_lod, max_lod, - min_filter, vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()), mip_mode, vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()), - is_depth_texture, depth_compare)); - - m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set); + m_program->bind_uniform({ fs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set); } } @@ -1146,28 +1269,19 @@ void VKGSRender::end() continue; } - vk::image_view *texture0 = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); - - if (!texture0) + auto sampler_state = static_cast(vs_sampler_state[i].get()); + if (!sampler_state->image_handle) { LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set); continue; } - m_current_frame->samplers_to_clean.push_back(std::make_unique( - *m_device, - VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN), - 0.f, 1.f, (f32)rsx::method_registers.vertex_textures[i].min_lod(), (f32)rsx::method_registers.vertex_textures[i].max_lod(), - VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()) - )); - - m_program->bind_uniform({ m_current_frame->samplers_to_clean.back()->value, texture0->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set); + m_program->bind_uniform({ vs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set); } } - std::chrono::time_point textures_end = steady_clock::now(); + textures_end = steady_clock::now(); m_textures_upload_time += std::chrono::duration_cast(textures_end - textures_start).count(); //While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point @@ -1828,36 +1942,13 @@ bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::check_program_status() { - auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple - { - vk::render_target *surface = nullptr; + return (rsx::method_registers.shader_program_address() != 0); +} - if (!is_depth) - surface = m_rtts.get_texture_from_render_target_if_applicable(texaddr); - else - surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - - const bool dirty_framebuffer = (surface != nullptr && !m_texture_cache.test_framebuffer(texaddr)); - if (dirty_framebuffer || !surface) - { - if (is_depth && m_texture_cache.is_depth_texture(texaddr, (u32)get_texture_size(tex))) - return std::make_tuple(true, 0); - - if (dirty_framebuffer) - return std::make_tuple(false, 0); - - auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth); - if (!rsc.surface || rsc.is_depth_surface != is_depth) - return std::make_tuple(false, 0); - - surface = rsc.surface; - } - - return std::make_tuple(true, surface->native_pitch); - }; - - get_current_fragment_program(rtt_lookup_func); - if (!current_fragment_program.valid) return false; +void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) +{ + get_current_fragment_program(fs_sampler_state); + verify(HERE), current_fragment_program.valid; get_current_vertex_program(); @@ -2033,7 +2124,7 @@ bool VKGSRender::check_program_status() if (update_stencil_info_front) { - VkStencilFaceFlags face_flag = (update_stencil_info_back)? VK_STENCIL_FACE_FRONT_BIT: VK_STENCIL_FRONT_AND_BACK; + VkStencilFaceFlags face_flag = (update_stencil_info_back) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK; vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask()); vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask()); @@ -2053,14 +2144,6 @@ bool VKGSRender::check_program_status() vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max()); } - return true; -} - -void VKGSRender::load_program(u32 vertex_count, u32 vertex_base) -{ - auto &vertex_program = current_vertex_program; - auto &fragment_program = current_fragment_program; - const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program); const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float)); const size_t required_mem = 512 + 8192 + fragment_buffer_sz; @@ -2733,13 +2816,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); m_current_command_buffer->begin(); - return result; -} + m_samplers_dirty.store(true); -void VKGSRender::notify_tile_unbound(u32 tile) -{ - //TODO: Handle texture writeback - //u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location); - //on_notify_memory_unmapped(addr, tiles[tile].size); - //m_rtts.invalidate_surface_address(addr, false); + return result; } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 6480f727ec..80f774d0fe 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -129,6 +129,16 @@ private: std::unique_ptr m_text_writer; + std::mutex m_sampler_mutex; + std::atomic_bool m_samplers_dirty = { true }; + std::array, rsx::limits::fragment_textures_count> fs_sampler_state = {}; + std::array, rsx::limits::vertex_textures_count> vs_sampler_state = {}; + std::array, rsx::limits::fragment_textures_count> fs_sampler_handles; + std::array, rsx::limits::vertex_textures_count> vs_sampler_handles; + + VkBufferView m_persistent_attribute_storage; + VkBufferView m_volatile_attribute_storage; + public: //vk::fbo draw_fbo; std::unique_ptr m_vertex_cache; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 3d66d0c099..780410af8c 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -576,7 +576,7 @@ namespace vk VkSamplerCreateInfo info = {}; sampler(VkDevice dev, VkSamplerAddressMode clamp_u, VkSamplerAddressMode clamp_v, VkSamplerAddressMode clamp_w, - bool unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod, + VkBool32 unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod, VkFilter min_filter, VkFilter mag_filter, VkSamplerMipmapMode mipmap_mode, VkBorderColor border_color, VkBool32 depth_compare = false, VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER) : m_device(dev) @@ -607,6 +607,21 @@ namespace vk vkDestroySampler(m_device, value, nullptr); } + bool matches(VkSamplerAddressMode clamp_u, VkSamplerAddressMode clamp_v, VkSamplerAddressMode clamp_w, + VkBool32 unnormalized_coordinates, float mipLodBias, float max_anisotropy, float min_lod, float max_lod, + VkFilter min_filter, VkFilter mag_filter, VkSamplerMipmapMode mipmap_mode, VkBorderColor border_color, + VkBool32 depth_compare = false, VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER) + { + if (info.magFilter != mag_filter || info.minFilter != min_filter || info.mipmapMode != mipmap_mode || + info.addressModeU != clamp_u || info.addressModeV != clamp_v || info.addressModeW != clamp_w || + info.compareEnable != depth_compare || info.unnormalizedCoordinates != unnormalized_coordinates || + info.mipLodBias != mipLodBias || info.maxAnisotropy != max_anisotropy || info.maxLod != max_lod || + info.minLod != min_lod || info.compareOp != depth_compare_mode || info.borderColor != border_color) + return false; + + return true; + } + sampler(const sampler&) = delete; sampler(sampler&&) = delete; private: diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index dbaf6c6337..29e08b8996 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -23,7 +23,7 @@ namespace vk if (memcmp(&att_state[0], &other.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) return false; - if (render_pass != other.render_pass) + if (render_pass_location != other.render_pass_location) return false; if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 33059baf24..bd7d857b68 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -47,10 +47,17 @@ namespace vk this->depth = depth; this->mipmaps = mipmaps; - uploaded_image_view.reset(view); - vram_texture = image; + if (managed) + { + managed_texture.reset(image); + uploaded_image_view.reset(view); + } + else + { + verify(HERE), uploaded_image_view.get() == nullptr; + } - if (managed) managed_texture.reset(image); + vram_texture = image; //TODO: Properly compute these values if (rsx_pitch > 0) @@ -157,15 +164,27 @@ namespace vk const u16 internal_width = std::min(width, rsx::apply_resolution_scale(width, true)); const u16 internal_height = std::min(height, rsx::apply_resolution_scale(height, true)); + VkImageAspectFlags aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; + switch (vram_texture->info.format) + { + case VK_FORMAT_D16_UNORM: + aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT; + break; + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + break; + } + VkBufferImageCopy copyRegion = {}; copyRegion.bufferOffset = 0; copyRegion.bufferRowLength = internal_width; copyRegion.bufferImageHeight = internal_height; - copyRegion.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + copyRegion.imageSubresource = {aspect_flag, 0, 0, 1}; copyRegion.imageOffset = {}; copyRegion.imageExtent = {internal_width, internal_height, 1}; - VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + VkImageSubresourceRange subresource_range = { aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }; VkImageLayout layout = vram_texture->current_layout; change_image_layout(cmd, vram_texture, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); @@ -246,6 +265,10 @@ namespace vk bool swap_bytes = false; switch (vram_texture->info.format) { + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + //TODO: Hardware tests to determine correct memory layout + case VK_FORMAT_D16_UNORM: case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32_SFLOAT: @@ -312,6 +335,19 @@ namespace vk return vram_texture->info.format == tex->info.format; } + bool is_depth_texture() const + { + switch (vram_texture->info.format) + { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + return true; + default: + return false; + } + } + u64 get_sync_timestamp() const { return sync_timestamp; @@ -453,7 +489,11 @@ namespace vk vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); vk::change_image_layout(cmd, source, old_src_layout, subresource_range); + const u32 resource_memory = w * h * 4; //Rough approximate m_discardable_storage.push_back({ image, view }); + m_discardable_storage.back().block_size = resource_memory; + m_discarded_memory_size += resource_memory; + return m_discardable_storage.back().view.get(); } @@ -733,7 +773,7 @@ namespace vk } template - image_view* _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts) + sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts) { return upload_texture(cmd, tex, m_rtts, *m_device, cmd, m_memory_types, const_cast(m_submit_queue)); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index b28838e4b6..03676e19be 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -237,7 +237,8 @@ VKGSRender::upload_vertex_data() auto required = calculate_memory_requirements(m_vertex_layout, vertex_count); size_t persistent_offset = UINT64_MAX, volatile_offset = UINT64_MAX; - VkBufferView persistent_view = VK_NULL_HANDLE, volatile_view = VK_NULL_HANDLE; + m_persistent_attribute_storage = VK_NULL_HANDLE; + m_volatile_attribute_storage = VK_NULL_HANDLE; if (required.first > 0) { @@ -277,11 +278,11 @@ VKGSRender::upload_vertex_data() } } - persistent_view = m_current_frame->buffer_views_to_clean.back()->value; + m_persistent_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value; } else { - persistent_view = null_buffer_view->value; + m_persistent_attribute_storage = null_buffer_view->value; } if (required.second > 0) @@ -290,16 +291,13 @@ VKGSRender::upload_vertex_data() m_current_frame->buffer_views_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8_UINT, volatile_offset, required.second)); - volatile_view = m_current_frame->buffer_views_to_clean.back()->value; + m_volatile_attribute_storage = m_current_frame->buffer_views_to_clean.back()->value; } else { - volatile_view = null_buffer_view->value; + m_volatile_attribute_storage = null_buffer_view->value; } - m_program->bind_uniform(persistent_view, "persistent_input_stream", m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_view, "volatile_input_stream", m_current_frame->descriptor_set); - //Write all the data once if possible if (required.first && required.second && volatile_offset > persistent_offset) { diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 2fc07f39e4..8b3d270d87 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -469,6 +469,15 @@ namespace rsx rsx->m_textures_dirty[index] = true; } }; + + template + struct set_vertex_texture_dirty_bit + { + static void impl(thread* rsx, u32 _reg, u32 arg) + { + rsx->m_vertex_textures_dirty[index] = true; + } + }; } namespace nv308a @@ -1539,6 +1548,14 @@ namespace rsx bind_range(); bind_range(); bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); bind(); bind(); bind();