rsx: Improve accuracy of shadow compare Ops when non-integer depth formats are used

- The fixed-point D24S8 format does special Z clamping during compare which matches PS3 behaviour - D32S8 is a floating point format and comparison with Dref > 1 always fails causing black edges/borders
2025-07-06 15:01:28 +12:00 · 2019-04-15 20:39:42 +03:00 · 2019-04-15 20:39:42 +03:00 · 463b1b220d
commit 463b1b220d
parent 7ad1646c2c
12 changed files with 192 additions and 53 deletions
--- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp
+++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp
@ -235,6 +235,8 @@ std::string FragmentProgramDecompiler::AddConst()

 std::string FragmentProgramDecompiler::AddTex()
 {
+	properties.has_tex_op = true;
+
 	std::string sampler;
 	switch (m_prog.get_texture_dimension(dst.tex_num))
 	{
@ -251,6 +253,7 @@ std::string FragmentProgramDecompiler::AddTex()
 		sampler = "sampler3D";
 		break;
 	}
+
 	return m_parr.AddParam(PF_PARAM_UNIFORM, sampler, std::string("tex") + std::to_string(dst.tex_num));
 }

--- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h
+++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h
@ -258,12 +258,14 @@ public:
 		bool has_wpos_input = false;
 		bool has_no_output = false;
 		bool has_discard_op = false;
+		bool has_tex_op = false;
 	}
 	properties;

 	struct
 	{
 		bool has_native_half_support = false;
+		bool emulate_depth_compare = false;
 	}
 	device_props;

--- a/rpcs3/Emu/RSX/Common/GLSLCommon.h
+++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h
@ -25,6 +25,26 @@ namespace program_common
 		"}\n\n";
 	}

+	static void insert_compare_op_vector(std::ostream& OS)
+	{
+		OS <<
+		"bvec4 comparison_passes(vec4 a, vec4 b, uint func)\n"
+		"{\n"
+		"	switch (func)\n"
+		"	{\n"
+		"		default:\n"
+		"		case 0: return bvec4(false); //never\n"
+		"		case 1: return lessThan(a, b); //less\n"
+		"		case 2: return equal(a, b); //equal\n"
+		"		case 3: return lessThanEqual(a, b); //lequal\n"
+		"		case 4: return greaterThan(a, b); //greater\n"
+		"		case 5: return notEqual(a, b); //nequal\n"
+		"		case 6: return greaterThanEqual(a, b); //gequal\n"
+		"		case 7: return bvec4(true); //always\n"
+		"	}\n"
+		"}\n\n";
+	}
+
 	static void insert_fog_declaration(std::ostream& OS, const std::string wide_vector_type, const std::string input_coord, bool declare = false)
 	{
 		std::string template_body;
@ -449,7 +469,7 @@ namespace glsl
 		"	ocol3 = " << reg3 << ";\n\n";
 	}

-	static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true)
+	static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false)
 	{
 		OS << "#define _select mix\n";
 		OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
@ -494,6 +514,11 @@ namespace glsl

 		program_common::insert_compare_op(OS);

+		if (require_texture_ops && emulate_pcf)
+		{
+			program_common::insert_compare_op_vector(OS);
+		}
+
 		// NOTES:
 		// Lowers alpha accuracy down to 2 bits, to mimic A2C banding
 		// Alpha lower than the real threshold (e.g 0.25 for 4 samples) gets a randomized chance to make it to the lowest transparency state
@ -509,6 +534,20 @@ namespace glsl
 		"	float alpha   = trunc((_sample.a + epsilon) * samples) / samples;\n"
 		"	//_sample.a     = min(_sample.a, alpha);\n" // Cannot blend A2C samples naively as they are order independent! Causes background bleeding
 		"	return (alpha > 0.f);\n"
+		"}\n\n"
+
+		"vec4 linear_to_srgb(vec4 cl)\n"
+		"{\n"
+		"	vec4 low = cl * 12.92;\n"
+		"	vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n"
+		"	bvec4 select = lessThan(cl, vec4(0.0031308));\n"
+		"	return clamp(mix(high, low, select), 0., 1.);\n"
+		"}\n\n"
+
+		"float srgb_to_linear(float cs)\n"
+		"{\n"
+		"	if (cs <= 0.04045) return cs / 12.92;\n"
+		"	return pow((cs + 0.055) / 1.055, 2.4);\n"
 		"}\n\n";

 		if (require_depth_conversion)
@ -518,7 +557,7 @@ namespace glsl
 			OS <<
 			"vec4 decodeLinearDepth(float depth_value)\n"
 			"{\n"
-			"	uint value = uint(depth_value * 16777215);\n"
+			"	uint value = uint(depth_value * 16777215.);\n"
 			"	uint b = (value & 0xff);\n"
 			"	uint g = (value >> 8) & 0xff;\n"
 			"	uint r = (value >> 16) & 0xff;\n"
@ -557,20 +596,24 @@ namespace glsl

 		if (require_texture_ops)
 		{
-			OS <<
-			"vec4 linear_to_srgb(vec4 cl)\n"
-			"{\n"
-			"	vec4 low = cl * 12.92;\n"
-			"	vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n"
-			"	bvec4 select = lessThan(cl, vec4(0.0031308));\n"
-			"	return clamp(mix(high, low, select), 0., 1.);\n"
-			"}\n\n"
+			if (emulate_pcf)
+			{
+				OS <<
+				"vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n"
+				"{\n"
+				"	vec4 samples = textureGather(tex, p.xy).xxxx;\n"
+				"	vec4 ref = clamp(p.z, 0., 1.).xxxx;\n"
+				"	vec4 filtered = vec4(comparison_passes(samples, ref, func));\n"
+				"	return filtered * dot(filtered, vec4(0.25f));\n"
+				"}\n\n"

-			"float srgb_to_linear(float cs)\n"
-			"{\n"
-			"	if (cs <= 0.04045) return cs / 12.92;\n"
-			"	return pow((cs + 0.055) / 1.055, 2.4);\n"
-			"}\n\n"
+				"vec4 shadowCompareProj(sampler2D tex, vec4 p, uint func)\n"
+				"{\n"
+				"	return shadowCompare(tex, p.xyz / p.w, func);\n"
+				"}\n\n";
+			}
+
+			OS <<

 #ifdef __APPLE__
 			"vec4 remap_vector(vec4 rgba, uint remap_bits)\n"
@ -592,7 +635,7 @@ namespace glsl
 			"	uint remap_bits = (control_bits >> 16) & 0xFFFF;\n"
 			"	if (remap_bits != 0x8D5) rgba = remap_vector(rgba, remap_bits);\n\n"
 #endif
-			"	if ((control_bits & 0xFFFF) == 0) return rgba;\n\n"
+			"	if ((control_bits & 0xFF) == 0) return rgba;\n\n"
 			"	if ((control_bits & 0x10) > 0)\n"
 			"	{\n"
 			"		//Alphakill\n"
@ -626,10 +669,22 @@ namespace glsl
 			"#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].xy, dpdx, dpdy), floatBitsToUint(texture_parameters[index].w))\n"
 			"#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.)), floatBitsToUint(texture_parameters[index].w))\n"

-			"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n"
-			"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.))\n"
-			"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.))\n"
+			"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n";

+			if (emulate_pcf)
+			{
+				OS <<
+				"#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n"
+				"#define TEX2D_SHADOWPROJ(index, coord4) shadowCompareProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n";
+			}
+			else
+			{
+				OS <<
+				"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.))\n"
+				"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.))\n";
+			}
+
+			OS <<
 			"#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), floatBitsToUint(texture_parameters[index].w))\n"
 			"#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), floatBitsToUint(texture_parameters[index].w))\n"
 			"#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), floatBitsToUint(texture_parameters[index].w))\n"
--- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp
@ -42,7 +42,7 @@ D3D12_SAMPLER_DESC get_sampler_desc(const rsx::fragment_texture &texture)
 	samplerDesc.AddressU = get_texture_wrap_mode(texture.wrap_s());
 	samplerDesc.AddressV = get_texture_wrap_mode(texture.wrap_t());
 	samplerDesc.AddressW = get_texture_wrap_mode(texture.wrap_r());
-	samplerDesc.ComparisonFunc = get_sampler_compare_func[texture.zfunc()];
+	samplerDesc.ComparisonFunc = get_sampler_compare_func[static_cast<u8>(texture.zfunc())];
 	samplerDesc.MaxAnisotropy = get_texture_max_aniso(texture.max_aniso());
 	samplerDesc.MipLODBias = texture.bias();
 	samplerDesc.BorderColor[0] = (FLOAT)texture.border_color();
--- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp
+++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp
@ -196,7 +196,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)

 void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
 {
-	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
+	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
+		m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
 }

 void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
--- a/rpcs3/Emu/RSX/RSXTexture.cpp
+++ b/rpcs3/Emu/RSX/RSXTexture.cpp
@ -1,4 +1,4 @@
-#include "stdafx.h"
+#include "stdafx.h"
 #include "Emu/Memory/vm.h"
 #include "RSXThread.h"
 #include "RSXTexture.h"
@ -93,16 +93,16 @@ namespace rsx
 		return rsx::to_texture_wrap_mode((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 16) & 0xf);
 	}

+	rsx::comparison_function fragment_texture::zfunc() const
+	{
+		return static_cast<rsx::comparison_function>((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf);
+	}
+
 	u8 fragment_texture::unsigned_remap() const
 	{
 		return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf);
 	}

-	u8 fragment_texture::zfunc() const
-	{
-		return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf);
-	}
-
 	u8 fragment_texture::gamma() const
 	{
 		return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 20) & 0xf);
--- a/rpcs3/Emu/RSX/RSXTexture.h
+++ b/rpcs3/Emu/RSX/RSXTexture.h
@ -1,4 +1,4 @@
-#pragma once
+#pragma once
 #include "GCM.h"

 namespace rsx
@ -51,8 +51,8 @@ namespace rsx
 		rsx::texture_wrap_mode wrap_s() const;
 		rsx::texture_wrap_mode wrap_t() const;
 		rsx::texture_wrap_mode wrap_r() const;
+		rsx::comparison_function zfunc() const;
 		u8 unsigned_remap() const;
-		u8 zfunc() const;
 		u8 gamma() const;
 		u8 aniso_bias() const;
 		u8 signed_remap() const;
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@ -1555,11 +1555,14 @@ namespace rsx
 					case CELL_GCM_TEXTURE_DEPTH24_D8:
 					case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
 					{
-						const auto compare_mode = (rsx::comparison_function)tex.zfunc();
+						const auto compare_mode = tex.zfunc();
 						if (result.textures_alpha_kill[i] == 0 &&
 							compare_mode < rsx::comparison_function::always &&
 							compare_mode > rsx::comparison_function::never)
+						{
 							result.shadow_textures |= (1 << i);
+							texture_control |= u32(tex.zfunc()) << 8;
+						}
 						break;
 					}
 					default:
@ -1652,7 +1655,7 @@ namespace rsx
 				if (tex.alpha_kill_enabled())
 				{
 					//alphakill can be ignored unless a valid comparison function is set
-					const rsx::comparison_function func = (rsx::comparison_function)tex.zfunc();
+					const auto func = tex.zfunc();
 					if (func < rsx::comparison_function::always && func > rsx::comparison_function::never)
 					{
 						result.textures_alpha_kill[i] = 1;
@ -1708,7 +1711,7 @@ namespace rsx
 						case CELL_GCM_TEXTURE_DEPTH24_D8:
 						case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
 						{
-							const auto compare_mode = (rsx::comparison_function)tex.zfunc();
+							const auto compare_mode = tex.zfunc();
 							if (result.textures_alpha_kill[i] == 0 &&
 								compare_mode < rsx::comparison_function::always &&
 								compare_mode > rsx::comparison_function::never)
--- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp
+++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp
@ -132,7 +132,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)

 			const auto mask = (1 << index);

-			if (m_prog.shadow_textures & mask)
+			if (!device_props.emulate_depth_compare && m_prog.shadow_textures & mask)
 			{
 				if (m_shadow_sampled_textures & mask)
 				{
@ -228,7 +228,8 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)

 void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
 {
-	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
+	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
+		m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
 }

 void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -417,11 +418,13 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
 	std::string source;
 	VKFragmentDecompilerThread decompiler(source, parr, prog, size, *this);

+	const auto pdev = vk::get_current_renderer();
 	if (!g_cfg.video.disable_native_float16)
 	{
-		decompiler.device_props.has_native_half_support = vk::get_current_renderer()->get_shader_types_support().allow_float16;
+		decompiler.device_props.has_native_half_support = pdev->get_shader_types_support().allow_float16;
 	}

+	decompiler.device_props.emulate_depth_compare = !pdev->get_formats_support().d24_unorm_s8;
 	decompiler.Task();

 	shader.create(::glsl::program_domain::glsl_fragment_program, source);
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -1503,29 +1503,69 @@ void VKGSRender::end()
 				if (rsx::method_registers.fragment_textures[i].enabled())
 				{
 					check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
-
 					*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);

-					const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
-					const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 ||
-							texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT);
-					VkCompareOp depth_compare_mode = compare_enabled ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER;
-
 					bool replace = !fs_sampler_handles[i];
-					VkFilter min_filter;
+					VkFilter min_filter, mag_filter;
 					VkSamplerMipmapMode mip_mode;
 					f32 min_lod = 0.f, max_lod = 0.f;
 					f32 lod_bias = 0.f;

+					const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
+					VkBool32 compare_enabled = VK_FALSE;
+					VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER;
+
+					if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT)
+					{
+						if (m_device->get_formats_support().d24_unorm_s8)
+						{
+							// NOTE:
+							// The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3
+							// In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison
+							// Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage
+							// Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results
+
+							// NOTE2:
+							// To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available
+
+							compare_enabled = VK_TRUE;
+							depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true);
+						}
+					}
+
 					const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0;
 					const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
 					const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s());
 					const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t());
 					const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r());
-					const auto mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
 					const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color());

-					std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
+					// Check if non-point filtering can even be used on this format
+					bool can_sample_linear;
+					if (LIKELY(!sampler_state->is_depth_texture))
+					{
+						// Most PS3-like formats can be linearly filtered without problem
+						can_sample_linear = true;
+					}
+					else
+					{
+						// Not all GPUs support linear filtering of depth formats
+						const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() :
+							vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format);
+
+						can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+					}
+
+					if (can_sample_linear)
+					{
+						mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
+						std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
+					}
+					else
+					{
+						mag_filter = min_filter = VK_FILTER_NEAREST;
+						mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
+					}

 					if (sampler_state->upload_context == rsx::texture_upload_context::shader_read &&
 						rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
@ -1576,6 +1616,7 @@ void VKGSRender::end()

 				if (rsx::method_registers.vertex_textures[i].enabled())
 				{
+					check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
 					*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);

 					bool replace = !vs_sampler_handles[i];
--- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp
@ -69,8 +69,9 @@ namespace vk

 	memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
 	{
+		VkPhysicalDevice pdev = dev;
 		VkPhysicalDeviceMemoryProperties memory_properties;
-		vkGetPhysicalDeviceMemoryProperties((VkPhysicalDevice&)dev, &memory_properties);
+		vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);

 		memory_type_mapping result;
 		result.device_local = VK_MAX_MEMORY_TYPES;
--- a/rpcs3/Emu/RSX/VK/VKHelpers.h
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.h
@ -404,7 +404,8 @@ namespace vk

 	class physical_device
 	{
-		VkPhysicalDevice dev = nullptr;
+		VkInstance parent = VK_NULL_HANDLE;
+		VkPhysicalDevice dev = VK_NULL_HANDLE;
 		VkPhysicalDeviceProperties props;
 		VkPhysicalDeviceMemoryProperties memory_properties;
 		std::vector<VkQueueFamilyProperties> queue_props;
@ -414,9 +415,10 @@ namespace vk
 		physical_device() {}
 		~physical_device() {}

-		void set_device(VkPhysicalDevice pdev)
+		void create(VkInstance context, VkPhysicalDevice pdev)
 		{
 			dev = pdev;
+			parent = context;
 			vkGetPhysicalDeviceProperties(pdev, &props);
 			vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);

@ -514,6 +516,11 @@ namespace vk
 		{
 			return dev;
 		}
+
+		operator VkInstance() const
+		{
+			return parent;
+		}
 	};

 	class supported_extensions
@ -565,6 +572,7 @@ namespace vk
 	{
 		physical_device *pgpu = nullptr;
 		memory_type_mapping memory_map{};
+		std::unordered_map<VkFormat, VkFormatProperties> m_format_properties;
 		gpu_formats_support m_formats_support{};
 		gpu_shader_types_support m_shader_types_support{};
 		std::unique_ptr<mem_allocator_base> m_allocator;
@ -572,27 +580,31 @@ namespace vk

 		void get_physical_device_features(VkPhysicalDeviceFeatures& features)
 		{
-			if (!vkGetPhysicalDeviceFeatures2)
+			supported_extensions instance_extensions(supported_extensions::instance);
+
+			if (!instance_extensions.is_supported("VK_KHR_get_physical_device_properties2"))
 			{
 				vkGetPhysicalDeviceFeatures(*pgpu, &features);
 			}
 			else
 			{
-				supported_extensions extension_support(supported_extensions::device, nullptr, pgpu);
+				supported_extensions device_extensions(supported_extensions::device, nullptr, pgpu);

-				VkPhysicalDeviceFeatures2 features2;
+				VkPhysicalDeviceFeatures2KHR features2;
 				features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
 				features2.pNext = nullptr;

 				VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};

-				if (extension_support.is_supported("VK_KHR_shader_float16_int8"))
+				if (device_extensions.is_supported("VK_KHR_shader_float16_int8"))
 				{
 					shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
 					features2.pNext = &shader_support_info;
 				}

-				vkGetPhysicalDeviceFeatures2(*pgpu, &features2);
+				auto getPhysicalDeviceFeatures2KHR = (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr(*pgpu, "vkGetPhysicalDeviceFeatures2KHR");
+				verify("vkGetInstanceProcAddress failed to find entry point!" HERE), getPhysicalDeviceFeatures2KHR;
+				getPhysicalDeviceFeatures2KHR(*pgpu, &features2);

 				m_shader_types_support.allow_float16 = !!shader_support_info.shaderFloat16;
 				m_shader_types_support.allow_int8 = !!shader_support_info.shaderInt8;
@ -696,6 +708,19 @@ namespace vk
 			}
 		}

+		const VkFormatProperties get_format_properties(VkFormat format)
+		{
+			auto found = m_format_properties.find(format);
+			if (found != m_format_properties.end())
+			{
+				return found->second;
+			}
+
+			auto& props = m_format_properties[format];
+			vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props);
+			return props;
+		}
+
 		bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32 *type_index) const
 		{
 			VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties();
@ -2266,6 +2291,11 @@ public:
 				{
 					extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
 				}
+
+				if (support.is_supported("VK_KHR_get_physical_device_properties2"))
+				{
+					extensions.push_back("VK_KHR_get_physical_device_properties2");
+				}
 #ifdef _WIN32
 				extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
 #elif defined(__APPLE__)
@ -2357,7 +2387,7 @@ public:
 				CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data()));

 				for (u32 i = 0; i < num_gpus; ++i)
-					gpus[i].set_device(pdevs[i]);
+					gpus[i].create(m_instance, pdevs[i]);
 			}

 			return gpus;