diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index a327dd9f35..455444ae4b 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -235,6 +235,8 @@ std::string FragmentProgramDecompiler::AddConst() std::string FragmentProgramDecompiler::AddTex() { + properties.has_tex_op = true; + std::string sampler; switch (m_prog.get_texture_dimension(dst.tex_num)) { @@ -251,6 +253,7 @@ std::string FragmentProgramDecompiler::AddTex() sampler = "sampler3D"; break; } + return m_parr.AddParam(PF_PARAM_UNIFORM, sampler, std::string("tex") + std::to_string(dst.tex_num)); } diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h index c88e975dab..9e76cb90f5 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h @@ -258,12 +258,14 @@ public: bool has_wpos_input = false; bool has_no_output = false; bool has_discard_op = false; + bool has_tex_op = false; } properties; struct { bool has_native_half_support = false; + bool emulate_depth_compare = false; } device_props; diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index d5c342f6d5..e017299278 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -25,6 +25,26 @@ namespace program_common "}\n\n"; } + static void insert_compare_op_vector(std::ostream& OS) + { + OS << + "bvec4 comparison_passes(vec4 a, vec4 b, uint func)\n" + "{\n" + " switch (func)\n" + " {\n" + " default:\n" + " case 0: return bvec4(false); //never\n" + " case 1: return lessThan(a, b); //less\n" + " case 2: return equal(a, b); //equal\n" + " case 3: return lessThanEqual(a, b); //lequal\n" + " case 4: return greaterThan(a, b); //greater\n" + " case 5: return notEqual(a, b); //nequal\n" + " case 6: return greaterThanEqual(a, b); //gequal\n" + " case 7: return bvec4(true); //always\n" + " }\n" + "}\n\n"; + } + static void insert_fog_declaration(std::ostream& OS, const std::string wide_vector_type, const std::string input_coord, bool declare = false) { std::string template_body; @@ -449,7 +469,7 @@ namespace glsl " ocol3 = " << reg3 << ";\n\n"; } - static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true) + static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false) { OS << "#define _select mix\n"; OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; @@ -494,6 +514,11 @@ namespace glsl program_common::insert_compare_op(OS); + if (require_texture_ops && emulate_pcf) + { + program_common::insert_compare_op_vector(OS); + } + // NOTES: // Lowers alpha accuracy down to 2 bits, to mimic A2C banding // Alpha lower than the real threshold (e.g 0.25 for 4 samples) gets a randomized chance to make it to the lowest transparency state @@ -509,6 +534,20 @@ namespace glsl " float alpha = trunc((_sample.a + epsilon) * samples) / samples;\n" " //_sample.a = min(_sample.a, alpha);\n" // Cannot blend A2C samples naively as they are order independent! Causes background bleeding " return (alpha > 0.f);\n" + "}\n\n" + + "vec4 linear_to_srgb(vec4 cl)\n" + "{\n" + " vec4 low = cl * 12.92;\n" + " vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n" + " bvec4 select = lessThan(cl, vec4(0.0031308));\n" + " return clamp(mix(high, low, select), 0., 1.);\n" + "}\n\n" + + "float srgb_to_linear(float cs)\n" + "{\n" + " if (cs <= 0.04045) return cs / 12.92;\n" + " return pow((cs + 0.055) / 1.055, 2.4);\n" "}\n\n"; if (require_depth_conversion) @@ -518,7 +557,7 @@ namespace glsl OS << "vec4 decodeLinearDepth(float depth_value)\n" "{\n" - " uint value = uint(depth_value * 16777215);\n" + " uint value = uint(depth_value * 16777215.);\n" " uint b = (value & 0xff);\n" " uint g = (value >> 8) & 0xff;\n" " uint r = (value >> 16) & 0xff;\n" @@ -557,20 +596,24 @@ namespace glsl if (require_texture_ops) { - OS << - "vec4 linear_to_srgb(vec4 cl)\n" - "{\n" - " vec4 low = cl * 12.92;\n" - " vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n" - " bvec4 select = lessThan(cl, vec4(0.0031308));\n" - " return clamp(mix(high, low, select), 0., 1.);\n" - "}\n\n" + if (emulate_pcf) + { + OS << + "vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n" + "{\n" + " vec4 samples = textureGather(tex, p.xy).xxxx;\n" + " vec4 ref = clamp(p.z, 0., 1.).xxxx;\n" + " vec4 filtered = vec4(comparison_passes(samples, ref, func));\n" + " return filtered * dot(filtered, vec4(0.25f));\n" + "}\n\n" - "float srgb_to_linear(float cs)\n" - "{\n" - " if (cs <= 0.04045) return cs / 12.92;\n" - " return pow((cs + 0.055) / 1.055, 2.4);\n" - "}\n\n" + "vec4 shadowCompareProj(sampler2D tex, vec4 p, uint func)\n" + "{\n" + " return shadowCompare(tex, p.xyz / p.w, func);\n" + "}\n\n"; + } + + OS << #ifdef __APPLE__ "vec4 remap_vector(vec4 rgba, uint remap_bits)\n" @@ -592,7 +635,7 @@ namespace glsl " uint remap_bits = (control_bits >> 16) & 0xFFFF;\n" " if (remap_bits != 0x8D5) rgba = remap_vector(rgba, remap_bits);\n\n" #endif - " if ((control_bits & 0xFFFF) == 0) return rgba;\n\n" + " if ((control_bits & 0xFF) == 0) return rgba;\n\n" " if ((control_bits & 0x10) > 0)\n" " {\n" " //Alphakill\n" @@ -626,10 +669,22 @@ namespace glsl "#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].xy, dpdx, dpdy), floatBitsToUint(texture_parameters[index].w))\n" "#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.)), floatBitsToUint(texture_parameters[index].w))\n" - "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n" - "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.))\n" - "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.))\n" + "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n"; + if (emulate_pcf) + { + OS << + "#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n" + "#define TEX2D_SHADOWPROJ(index, coord4) shadowCompareProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n"; + } + else + { + OS << + "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.))\n" + "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.))\n"; + } + + OS << "#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), floatBitsToUint(texture_parameters[index].w))\n" "#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), floatBitsToUint(texture_parameters[index].w))\n" "#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), floatBitsToUint(texture_parameters[index].w))\n" diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 5d04207b41..da83a6dc0b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -42,7 +42,7 @@ D3D12_SAMPLER_DESC get_sampler_desc(const rsx::fragment_texture &texture) samplerDesc.AddressU = get_texture_wrap_mode(texture.wrap_s()); samplerDesc.AddressV = get_texture_wrap_mode(texture.wrap_t()); samplerDesc.AddressW = get_texture_wrap_mode(texture.wrap_r()); - samplerDesc.ComparisonFunc = get_sampler_compare_func[texture.zfunc()]; + samplerDesc.ComparisonFunc = get_sampler_compare_func[static_cast(texture.zfunc())]; samplerDesc.MaxAnisotropy = get_texture_max_aniso(texture.max_aniso()); samplerDesc.MipLODBias = texture.bias(); samplerDesc.BorderColor[0] = (FLOAT)texture.border_color(); diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index a2bd6ad86c..6736753ad3 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -196,7 +196,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, + m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare); } void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index a4c2b9cecd..220d9d815d 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/Memory/vm.h" #include "RSXThread.h" #include "RSXTexture.h" @@ -93,16 +93,16 @@ namespace rsx return rsx::to_texture_wrap_mode((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 16) & 0xf); } + rsx::comparison_function fragment_texture::zfunc() const + { + return static_cast((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf); + } + u8 fragment_texture::unsigned_remap() const { return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf); } - u8 fragment_texture::zfunc() const - { - return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf); - } - u8 fragment_texture::gamma() const { return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 20) & 0xf); diff --git a/rpcs3/Emu/RSX/RSXTexture.h b/rpcs3/Emu/RSX/RSXTexture.h index 2d02a29dc5..16a0136779 100644 --- a/rpcs3/Emu/RSX/RSXTexture.h +++ b/rpcs3/Emu/RSX/RSXTexture.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "GCM.h" namespace rsx @@ -51,8 +51,8 @@ namespace rsx rsx::texture_wrap_mode wrap_s() const; rsx::texture_wrap_mode wrap_t() const; rsx::texture_wrap_mode wrap_r() const; + rsx::comparison_function zfunc() const; u8 unsigned_remap() const; - u8 zfunc() const; u8 gamma() const; u8 aniso_bias() const; u8 signed_remap() const; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fde8123967..5332ee6a30 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1555,11 +1555,14 @@ namespace rsx case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { - const auto compare_mode = (rsx::comparison_function)tex.zfunc(); + const auto compare_mode = tex.zfunc(); if (result.textures_alpha_kill[i] == 0 && compare_mode < rsx::comparison_function::always && compare_mode > rsx::comparison_function::never) + { result.shadow_textures |= (1 << i); + texture_control |= u32(tex.zfunc()) << 8; + } break; } default: @@ -1652,7 +1655,7 @@ namespace rsx if (tex.alpha_kill_enabled()) { //alphakill can be ignored unless a valid comparison function is set - const rsx::comparison_function func = (rsx::comparison_function)tex.zfunc(); + const auto func = tex.zfunc(); if (func < rsx::comparison_function::always && func > rsx::comparison_function::never) { result.textures_alpha_kill[i] = 1; @@ -1708,7 +1711,7 @@ namespace rsx case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { - const auto compare_mode = (rsx::comparison_function)tex.zfunc(); + const auto compare_mode = tex.zfunc(); if (result.textures_alpha_kill[i] == 0 && compare_mode < rsx::comparison_function::always && compare_mode > rsx::comparison_function::never) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index dcde6fda12..076030cfbc 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -132,7 +132,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) const auto mask = (1 << index); - if (m_prog.shadow_textures & mask) + if (!device_props.emulate_depth_compare && m_prog.shadow_textures & mask) { if (m_shadow_sampled_textures & mask) { @@ -228,7 +228,8 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input); + glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, + m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare); } void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) @@ -417,11 +418,13 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) std::string source; VKFragmentDecompilerThread decompiler(source, parr, prog, size, *this); + const auto pdev = vk::get_current_renderer(); if (!g_cfg.video.disable_native_float16) { - decompiler.device_props.has_native_half_support = vk::get_current_renderer()->get_shader_types_support().allow_float16; + decompiler.device_props.has_native_half_support = pdev->get_shader_types_support().allow_float16; } + decompiler.device_props.emulate_depth_compare = !pdev->get_formats_support().d24_unorm_s8; decompiler.Task(); shader.create(::glsl::program_domain::glsl_fragment_program, source); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index dd15891fa7..170619c7ed 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1503,29 +1503,69 @@ void VKGSRender::end() if (rsx::method_registers.fragment_textures[i].enabled()) { check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); - *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); - const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || - texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT); - VkCompareOp depth_compare_mode = compare_enabled ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER; - bool replace = !fs_sampler_handles[i]; - VkFilter min_filter; + VkFilter min_filter, mag_filter; VkSamplerMipmapMode mip_mode; f32 min_lod = 0.f, max_lod = 0.f; f32 lod_bias = 0.f; + const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + VkBool32 compare_enabled = VK_FALSE; + VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER; + + if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT) + { + if (m_device->get_formats_support().d24_unorm_s8) + { + // NOTE: + // The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3 + // In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison + // Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage + // Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results + + // NOTE2: + // To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available + + compare_enabled = VK_TRUE; + depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true); + } + } + const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0; const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()); const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()); const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()); - const auto mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()); - std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter()); + // Check if non-point filtering can even be used on this format + bool can_sample_linear; + if (LIKELY(!sampler_state->is_depth_texture)) + { + // Most PS3-like formats can be linearly filtered without problem + can_sample_linear = true; + } + else + { + // Not all GPUs support linear filtering of depth formats + const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() : + vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format); + + can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + if (can_sample_linear) + { + mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); + std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter()); + } + else + { + mag_filter = min_filter = VK_FILTER_NEAREST; + mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + } if (sampler_state->upload_context == rsx::texture_upload_context::shader_read && rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1) @@ -1576,6 +1616,7 @@ void VKGSRender::end() if (rsx::method_registers.vertex_textures[i].enabled()) { + check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); bool replace = !vs_sampler_handles[i]; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index cb9dc18035..b9d8ee976c 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -69,8 +69,9 @@ namespace vk memory_type_mapping get_memory_mapping(const vk::physical_device& dev) { + VkPhysicalDevice pdev = dev; VkPhysicalDeviceMemoryProperties memory_properties; - vkGetPhysicalDeviceMemoryProperties((VkPhysicalDevice&)dev, &memory_properties); + vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties); memory_type_mapping result; result.device_local = VK_MAX_MEMORY_TYPES; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 4c03e3d2ed..f694579bf6 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -404,7 +404,8 @@ namespace vk class physical_device { - VkPhysicalDevice dev = nullptr; + VkInstance parent = VK_NULL_HANDLE; + VkPhysicalDevice dev = VK_NULL_HANDLE; VkPhysicalDeviceProperties props; VkPhysicalDeviceMemoryProperties memory_properties; std::vector queue_props; @@ -414,9 +415,10 @@ namespace vk physical_device() {} ~physical_device() {} - void set_device(VkPhysicalDevice pdev) + void create(VkInstance context, VkPhysicalDevice pdev) { dev = pdev; + parent = context; vkGetPhysicalDeviceProperties(pdev, &props); vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties); @@ -514,6 +516,11 @@ namespace vk { return dev; } + + operator VkInstance() const + { + return parent; + } }; class supported_extensions @@ -565,6 +572,7 @@ namespace vk { physical_device *pgpu = nullptr; memory_type_mapping memory_map{}; + std::unordered_map m_format_properties; gpu_formats_support m_formats_support{}; gpu_shader_types_support m_shader_types_support{}; std::unique_ptr m_allocator; @@ -572,27 +580,31 @@ namespace vk void get_physical_device_features(VkPhysicalDeviceFeatures& features) { - if (!vkGetPhysicalDeviceFeatures2) + supported_extensions instance_extensions(supported_extensions::instance); + + if (!instance_extensions.is_supported("VK_KHR_get_physical_device_properties2")) { vkGetPhysicalDeviceFeatures(*pgpu, &features); } else { - supported_extensions extension_support(supported_extensions::device, nullptr, pgpu); + supported_extensions device_extensions(supported_extensions::device, nullptr, pgpu); - VkPhysicalDeviceFeatures2 features2; + VkPhysicalDeviceFeatures2KHR features2; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = nullptr; VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{}; - if (extension_support.is_supported("VK_KHR_shader_float16_int8")) + if (device_extensions.is_supported("VK_KHR_shader_float16_int8")) { shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; features2.pNext = &shader_support_info; } - vkGetPhysicalDeviceFeatures2(*pgpu, &features2); + auto getPhysicalDeviceFeatures2KHR = (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr(*pgpu, "vkGetPhysicalDeviceFeatures2KHR"); + verify("vkGetInstanceProcAddress failed to find entry point!" HERE), getPhysicalDeviceFeatures2KHR; + getPhysicalDeviceFeatures2KHR(*pgpu, &features2); m_shader_types_support.allow_float16 = !!shader_support_info.shaderFloat16; m_shader_types_support.allow_int8 = !!shader_support_info.shaderInt8; @@ -696,6 +708,19 @@ namespace vk } } + const VkFormatProperties get_format_properties(VkFormat format) + { + auto found = m_format_properties.find(format); + if (found != m_format_properties.end()) + { + return found->second; + } + + auto& props = m_format_properties[format]; + vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props); + return props; + } + bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32 *type_index) const { VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties(); @@ -2266,6 +2291,11 @@ public: { extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); } + + if (support.is_supported("VK_KHR_get_physical_device_properties2")) + { + extensions.push_back("VK_KHR_get_physical_device_properties2"); + } #ifdef _WIN32 extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); #elif defined(__APPLE__) @@ -2357,7 +2387,7 @@ public: CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data())); for (u32 i = 0; i < num_gpus; ++i) - gpus[i].set_device(pdevs[i]); + gpus[i].create(m_instance, pdevs[i]); } return gpus;