mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-06 15:01:28 +12:00
rsx: Improve accuracy of shadow compare Ops when non-integer depth formats are used
- The fixed-point D24S8 format does special Z clamping during compare which matches PS3 behaviour - D32S8 is a floating point format and comparison with Dref > 1 always fails causing black edges/borders
This commit is contained in:
parent
7ad1646c2c
commit
463b1b220d
12 changed files with 192 additions and 53 deletions
|
@ -235,6 +235,8 @@ std::string FragmentProgramDecompiler::AddConst()
|
|||
|
||||
std::string FragmentProgramDecompiler::AddTex()
|
||||
{
|
||||
properties.has_tex_op = true;
|
||||
|
||||
std::string sampler;
|
||||
switch (m_prog.get_texture_dimension(dst.tex_num))
|
||||
{
|
||||
|
@ -251,6 +253,7 @@ std::string FragmentProgramDecompiler::AddTex()
|
|||
sampler = "sampler3D";
|
||||
break;
|
||||
}
|
||||
|
||||
return m_parr.AddParam(PF_PARAM_UNIFORM, sampler, std::string("tex") + std::to_string(dst.tex_num));
|
||||
}
|
||||
|
||||
|
|
|
@ -258,12 +258,14 @@ public:
|
|||
bool has_wpos_input = false;
|
||||
bool has_no_output = false;
|
||||
bool has_discard_op = false;
|
||||
bool has_tex_op = false;
|
||||
}
|
||||
properties;
|
||||
|
||||
struct
|
||||
{
|
||||
bool has_native_half_support = false;
|
||||
bool emulate_depth_compare = false;
|
||||
}
|
||||
device_props;
|
||||
|
||||
|
|
|
@ -25,6 +25,26 @@ namespace program_common
|
|||
"}\n\n";
|
||||
}
|
||||
|
||||
static void insert_compare_op_vector(std::ostream& OS)
|
||||
{
|
||||
OS <<
|
||||
"bvec4 comparison_passes(vec4 a, vec4 b, uint func)\n"
|
||||
"{\n"
|
||||
" switch (func)\n"
|
||||
" {\n"
|
||||
" default:\n"
|
||||
" case 0: return bvec4(false); //never\n"
|
||||
" case 1: return lessThan(a, b); //less\n"
|
||||
" case 2: return equal(a, b); //equal\n"
|
||||
" case 3: return lessThanEqual(a, b); //lequal\n"
|
||||
" case 4: return greaterThan(a, b); //greater\n"
|
||||
" case 5: return notEqual(a, b); //nequal\n"
|
||||
" case 6: return greaterThanEqual(a, b); //gequal\n"
|
||||
" case 7: return bvec4(true); //always\n"
|
||||
" }\n"
|
||||
"}\n\n";
|
||||
}
|
||||
|
||||
static void insert_fog_declaration(std::ostream& OS, const std::string wide_vector_type, const std::string input_coord, bool declare = false)
|
||||
{
|
||||
std::string template_body;
|
||||
|
@ -449,7 +469,7 @@ namespace glsl
|
|||
" ocol3 = " << reg3 << ";\n\n";
|
||||
}
|
||||
|
||||
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true)
|
||||
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false)
|
||||
{
|
||||
OS << "#define _select mix\n";
|
||||
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
|
||||
|
@ -494,6 +514,11 @@ namespace glsl
|
|||
|
||||
program_common::insert_compare_op(OS);
|
||||
|
||||
if (require_texture_ops && emulate_pcf)
|
||||
{
|
||||
program_common::insert_compare_op_vector(OS);
|
||||
}
|
||||
|
||||
// NOTES:
|
||||
// Lowers alpha accuracy down to 2 bits, to mimic A2C banding
|
||||
// Alpha lower than the real threshold (e.g 0.25 for 4 samples) gets a randomized chance to make it to the lowest transparency state
|
||||
|
@ -509,6 +534,20 @@ namespace glsl
|
|||
" float alpha = trunc((_sample.a + epsilon) * samples) / samples;\n"
|
||||
" //_sample.a = min(_sample.a, alpha);\n" // Cannot blend A2C samples naively as they are order independent! Causes background bleeding
|
||||
" return (alpha > 0.f);\n"
|
||||
"}\n\n"
|
||||
|
||||
"vec4 linear_to_srgb(vec4 cl)\n"
|
||||
"{\n"
|
||||
" vec4 low = cl * 12.92;\n"
|
||||
" vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n"
|
||||
" bvec4 select = lessThan(cl, vec4(0.0031308));\n"
|
||||
" return clamp(mix(high, low, select), 0., 1.);\n"
|
||||
"}\n\n"
|
||||
|
||||
"float srgb_to_linear(float cs)\n"
|
||||
"{\n"
|
||||
" if (cs <= 0.04045) return cs / 12.92;\n"
|
||||
" return pow((cs + 0.055) / 1.055, 2.4);\n"
|
||||
"}\n\n";
|
||||
|
||||
if (require_depth_conversion)
|
||||
|
@ -518,7 +557,7 @@ namespace glsl
|
|||
OS <<
|
||||
"vec4 decodeLinearDepth(float depth_value)\n"
|
||||
"{\n"
|
||||
" uint value = uint(depth_value * 16777215);\n"
|
||||
" uint value = uint(depth_value * 16777215.);\n"
|
||||
" uint b = (value & 0xff);\n"
|
||||
" uint g = (value >> 8) & 0xff;\n"
|
||||
" uint r = (value >> 16) & 0xff;\n"
|
||||
|
@ -557,20 +596,24 @@ namespace glsl
|
|||
|
||||
if (require_texture_ops)
|
||||
{
|
||||
OS <<
|
||||
"vec4 linear_to_srgb(vec4 cl)\n"
|
||||
"{\n"
|
||||
" vec4 low = cl * 12.92;\n"
|
||||
" vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n"
|
||||
" bvec4 select = lessThan(cl, vec4(0.0031308));\n"
|
||||
" return clamp(mix(high, low, select), 0., 1.);\n"
|
||||
"}\n\n"
|
||||
if (emulate_pcf)
|
||||
{
|
||||
OS <<
|
||||
"vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n"
|
||||
"{\n"
|
||||
" vec4 samples = textureGather(tex, p.xy).xxxx;\n"
|
||||
" vec4 ref = clamp(p.z, 0., 1.).xxxx;\n"
|
||||
" vec4 filtered = vec4(comparison_passes(samples, ref, func));\n"
|
||||
" return filtered * dot(filtered, vec4(0.25f));\n"
|
||||
"}\n\n"
|
||||
|
||||
"float srgb_to_linear(float cs)\n"
|
||||
"{\n"
|
||||
" if (cs <= 0.04045) return cs / 12.92;\n"
|
||||
" return pow((cs + 0.055) / 1.055, 2.4);\n"
|
||||
"}\n\n"
|
||||
"vec4 shadowCompareProj(sampler2D tex, vec4 p, uint func)\n"
|
||||
"{\n"
|
||||
" return shadowCompare(tex, p.xyz / p.w, func);\n"
|
||||
"}\n\n";
|
||||
}
|
||||
|
||||
OS <<
|
||||
|
||||
#ifdef __APPLE__
|
||||
"vec4 remap_vector(vec4 rgba, uint remap_bits)\n"
|
||||
|
@ -592,7 +635,7 @@ namespace glsl
|
|||
" uint remap_bits = (control_bits >> 16) & 0xFFFF;\n"
|
||||
" if (remap_bits != 0x8D5) rgba = remap_vector(rgba, remap_bits);\n\n"
|
||||
#endif
|
||||
" if ((control_bits & 0xFFFF) == 0) return rgba;\n\n"
|
||||
" if ((control_bits & 0xFF) == 0) return rgba;\n\n"
|
||||
" if ((control_bits & 0x10) > 0)\n"
|
||||
" {\n"
|
||||
" //Alphakill\n"
|
||||
|
@ -626,10 +669,22 @@ namespace glsl
|
|||
"#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].xy, dpdx, dpdy), floatBitsToUint(texture_parameters[index].w))\n"
|
||||
"#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.)), floatBitsToUint(texture_parameters[index].w))\n"
|
||||
|
||||
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n"
|
||||
"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.))\n"
|
||||
"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.))\n"
|
||||
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n";
|
||||
|
||||
if (emulate_pcf)
|
||||
{
|
||||
OS <<
|
||||
"#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n"
|
||||
"#define TEX2D_SHADOWPROJ(index, coord4) shadowCompareProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS <<
|
||||
"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.))\n"
|
||||
"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].xy, 1., 1.))\n";
|
||||
}
|
||||
|
||||
OS <<
|
||||
"#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), floatBitsToUint(texture_parameters[index].w))\n"
|
||||
"#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), floatBitsToUint(texture_parameters[index].w))\n"
|
||||
"#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), floatBitsToUint(texture_parameters[index].w))\n"
|
||||
|
|
|
@ -42,7 +42,7 @@ D3D12_SAMPLER_DESC get_sampler_desc(const rsx::fragment_texture &texture)
|
|||
samplerDesc.AddressU = get_texture_wrap_mode(texture.wrap_s());
|
||||
samplerDesc.AddressV = get_texture_wrap_mode(texture.wrap_t());
|
||||
samplerDesc.AddressW = get_texture_wrap_mode(texture.wrap_r());
|
||||
samplerDesc.ComparisonFunc = get_sampler_compare_func[texture.zfunc()];
|
||||
samplerDesc.ComparisonFunc = get_sampler_compare_func[static_cast<u8>(texture.zfunc())];
|
||||
samplerDesc.MaxAnisotropy = get_texture_max_aniso(texture.max_aniso());
|
||||
samplerDesc.MipLODBias = texture.bias();
|
||||
samplerDesc.BorderColor[0] = (FLOAT)texture.border_color();
|
||||
|
|
|
@ -196,7 +196,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
|||
|
||||
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||
{
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
|
||||
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
|
||||
}
|
||||
|
||||
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "stdafx.h"
|
||||
#include "stdafx.h"
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "RSXThread.h"
|
||||
#include "RSXTexture.h"
|
||||
|
@ -93,16 +93,16 @@ namespace rsx
|
|||
return rsx::to_texture_wrap_mode((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 16) & 0xf);
|
||||
}
|
||||
|
||||
rsx::comparison_function fragment_texture::zfunc() const
|
||||
{
|
||||
return static_cast<rsx::comparison_function>((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf);
|
||||
}
|
||||
|
||||
u8 fragment_texture::unsigned_remap() const
|
||||
{
|
||||
return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf);
|
||||
}
|
||||
|
||||
u8 fragment_texture::zfunc() const
|
||||
{
|
||||
return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf);
|
||||
}
|
||||
|
||||
u8 fragment_texture::gamma() const
|
||||
{
|
||||
return ((registers[NV4097_SET_TEXTURE_ADDRESS + (m_index * 8)] >> 20) & 0xf);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#pragma once
|
||||
#pragma once
|
||||
#include "GCM.h"
|
||||
|
||||
namespace rsx
|
||||
|
@ -51,8 +51,8 @@ namespace rsx
|
|||
rsx::texture_wrap_mode wrap_s() const;
|
||||
rsx::texture_wrap_mode wrap_t() const;
|
||||
rsx::texture_wrap_mode wrap_r() const;
|
||||
rsx::comparison_function zfunc() const;
|
||||
u8 unsigned_remap() const;
|
||||
u8 zfunc() const;
|
||||
u8 gamma() const;
|
||||
u8 aniso_bias() const;
|
||||
u8 signed_remap() const;
|
||||
|
|
|
@ -1555,11 +1555,14 @@ namespace rsx
|
|||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
|
||||
{
|
||||
const auto compare_mode = (rsx::comparison_function)tex.zfunc();
|
||||
const auto compare_mode = tex.zfunc();
|
||||
if (result.textures_alpha_kill[i] == 0 &&
|
||||
compare_mode < rsx::comparison_function::always &&
|
||||
compare_mode > rsx::comparison_function::never)
|
||||
{
|
||||
result.shadow_textures |= (1 << i);
|
||||
texture_control |= u32(tex.zfunc()) << 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -1652,7 +1655,7 @@ namespace rsx
|
|||
if (tex.alpha_kill_enabled())
|
||||
{
|
||||
//alphakill can be ignored unless a valid comparison function is set
|
||||
const rsx::comparison_function func = (rsx::comparison_function)tex.zfunc();
|
||||
const auto func = tex.zfunc();
|
||||
if (func < rsx::comparison_function::always && func > rsx::comparison_function::never)
|
||||
{
|
||||
result.textures_alpha_kill[i] = 1;
|
||||
|
@ -1708,7 +1711,7 @@ namespace rsx
|
|||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
|
||||
{
|
||||
const auto compare_mode = (rsx::comparison_function)tex.zfunc();
|
||||
const auto compare_mode = tex.zfunc();
|
||||
if (result.textures_alpha_kill[i] == 0 &&
|
||||
compare_mode < rsx::comparison_function::always &&
|
||||
compare_mode > rsx::comparison_function::never)
|
||||
|
|
|
@ -132,7 +132,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
|||
|
||||
const auto mask = (1 << index);
|
||||
|
||||
if (m_prog.shadow_textures & mask)
|
||||
if (!device_props.emulate_depth_compare && m_prog.shadow_textures & mask)
|
||||
{
|
||||
if (m_shadow_sampled_textures & mask)
|
||||
{
|
||||
|
@ -228,7 +228,8 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
|||
|
||||
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||
{
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
|
||||
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
|
||||
}
|
||||
|
||||
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
|
@ -417,11 +418,13 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
|
|||
std::string source;
|
||||
VKFragmentDecompilerThread decompiler(source, parr, prog, size, *this);
|
||||
|
||||
const auto pdev = vk::get_current_renderer();
|
||||
if (!g_cfg.video.disable_native_float16)
|
||||
{
|
||||
decompiler.device_props.has_native_half_support = vk::get_current_renderer()->get_shader_types_support().allow_float16;
|
||||
decompiler.device_props.has_native_half_support = pdev->get_shader_types_support().allow_float16;
|
||||
}
|
||||
|
||||
decompiler.device_props.emulate_depth_compare = !pdev->get_formats_support().d24_unorm_s8;
|
||||
decompiler.Task();
|
||||
|
||||
shader.create(::glsl::program_domain::glsl_fragment_program, source);
|
||||
|
|
|
@ -1503,29 +1503,69 @@ void VKGSRender::end()
|
|||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
||||
|
||||
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||
const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 ||
|
||||
texture_format == CELL_GCM_TEXTURE_DEPTH16_FLOAT || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT);
|
||||
VkCompareOp depth_compare_mode = compare_enabled ? vk::get_compare_func((rsx::comparison_function)rsx::method_registers.fragment_textures[i].zfunc(), true) : VK_COMPARE_OP_NEVER;
|
||||
|
||||
bool replace = !fs_sampler_handles[i];
|
||||
VkFilter min_filter;
|
||||
VkFilter min_filter, mag_filter;
|
||||
VkSamplerMipmapMode mip_mode;
|
||||
f32 min_lod = 0.f, max_lod = 0.f;
|
||||
f32 lod_bias = 0.f;
|
||||
|
||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||
VkBool32 compare_enabled = VK_FALSE;
|
||||
VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER;
|
||||
|
||||
if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT)
|
||||
{
|
||||
if (m_device->get_formats_support().d24_unorm_s8)
|
||||
{
|
||||
// NOTE:
|
||||
// The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3
|
||||
// In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison
|
||||
// Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage
|
||||
// Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results
|
||||
|
||||
// NOTE2:
|
||||
// To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available
|
||||
|
||||
compare_enabled = VK_TRUE;
|
||||
depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true);
|
||||
}
|
||||
}
|
||||
|
||||
const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0;
|
||||
const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
|
||||
const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s());
|
||||
const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t());
|
||||
const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r());
|
||||
const auto mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color());
|
||||
|
||||
std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
|
||||
// Check if non-point filtering can even be used on this format
|
||||
bool can_sample_linear;
|
||||
if (LIKELY(!sampler_state->is_depth_texture))
|
||||
{
|
||||
// Most PS3-like formats can be linearly filtered without problem
|
||||
can_sample_linear = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not all GPUs support linear filtering of depth formats
|
||||
const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() :
|
||||
vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format);
|
||||
|
||||
can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
||||
}
|
||||
|
||||
if (can_sample_linear)
|
||||
{
|
||||
mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
|
||||
std::tie(min_filter, mip_mode) = vk::get_min_filter_and_mip(rsx::method_registers.fragment_textures[i].min_filter());
|
||||
}
|
||||
else
|
||||
{
|
||||
mag_filter = min_filter = VK_FILTER_NEAREST;
|
||||
mip_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
|
||||
if (sampler_state->upload_context == rsx::texture_upload_context::shader_read &&
|
||||
rsx::method_registers.fragment_textures[i].get_exact_mipmap_count() > 1)
|
||||
|
@ -1576,6 +1616,7 @@ void VKGSRender::end()
|
|||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
||||
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
bool replace = !vs_sampler_handles[i];
|
||||
|
|
|
@ -69,8 +69,9 @@ namespace vk
|
|||
|
||||
memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
|
||||
{
|
||||
VkPhysicalDevice pdev = dev;
|
||||
VkPhysicalDeviceMemoryProperties memory_properties;
|
||||
vkGetPhysicalDeviceMemoryProperties((VkPhysicalDevice&)dev, &memory_properties);
|
||||
vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);
|
||||
|
||||
memory_type_mapping result;
|
||||
result.device_local = VK_MAX_MEMORY_TYPES;
|
||||
|
|
|
@ -404,7 +404,8 @@ namespace vk
|
|||
|
||||
class physical_device
|
||||
{
|
||||
VkPhysicalDevice dev = nullptr;
|
||||
VkInstance parent = VK_NULL_HANDLE;
|
||||
VkPhysicalDevice dev = VK_NULL_HANDLE;
|
||||
VkPhysicalDeviceProperties props;
|
||||
VkPhysicalDeviceMemoryProperties memory_properties;
|
||||
std::vector<VkQueueFamilyProperties> queue_props;
|
||||
|
@ -414,9 +415,10 @@ namespace vk
|
|||
physical_device() {}
|
||||
~physical_device() {}
|
||||
|
||||
void set_device(VkPhysicalDevice pdev)
|
||||
void create(VkInstance context, VkPhysicalDevice pdev)
|
||||
{
|
||||
dev = pdev;
|
||||
parent = context;
|
||||
vkGetPhysicalDeviceProperties(pdev, &props);
|
||||
vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);
|
||||
|
||||
|
@ -514,6 +516,11 @@ namespace vk
|
|||
{
|
||||
return dev;
|
||||
}
|
||||
|
||||
operator VkInstance() const
|
||||
{
|
||||
return parent;
|
||||
}
|
||||
};
|
||||
|
||||
class supported_extensions
|
||||
|
@ -565,6 +572,7 @@ namespace vk
|
|||
{
|
||||
physical_device *pgpu = nullptr;
|
||||
memory_type_mapping memory_map{};
|
||||
std::unordered_map<VkFormat, VkFormatProperties> m_format_properties;
|
||||
gpu_formats_support m_formats_support{};
|
||||
gpu_shader_types_support m_shader_types_support{};
|
||||
std::unique_ptr<mem_allocator_base> m_allocator;
|
||||
|
@ -572,27 +580,31 @@ namespace vk
|
|||
|
||||
void get_physical_device_features(VkPhysicalDeviceFeatures& features)
|
||||
{
|
||||
if (!vkGetPhysicalDeviceFeatures2)
|
||||
supported_extensions instance_extensions(supported_extensions::instance);
|
||||
|
||||
if (!instance_extensions.is_supported("VK_KHR_get_physical_device_properties2"))
|
||||
{
|
||||
vkGetPhysicalDeviceFeatures(*pgpu, &features);
|
||||
}
|
||||
else
|
||||
{
|
||||
supported_extensions extension_support(supported_extensions::device, nullptr, pgpu);
|
||||
supported_extensions device_extensions(supported_extensions::device, nullptr, pgpu);
|
||||
|
||||
VkPhysicalDeviceFeatures2 features2;
|
||||
VkPhysicalDeviceFeatures2KHR features2;
|
||||
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||
features2.pNext = nullptr;
|
||||
|
||||
VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
|
||||
|
||||
if (extension_support.is_supported("VK_KHR_shader_float16_int8"))
|
||||
if (device_extensions.is_supported("VK_KHR_shader_float16_int8"))
|
||||
{
|
||||
shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
|
||||
features2.pNext = &shader_support_info;
|
||||
}
|
||||
|
||||
vkGetPhysicalDeviceFeatures2(*pgpu, &features2);
|
||||
auto getPhysicalDeviceFeatures2KHR = (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr(*pgpu, "vkGetPhysicalDeviceFeatures2KHR");
|
||||
verify("vkGetInstanceProcAddress failed to find entry point!" HERE), getPhysicalDeviceFeatures2KHR;
|
||||
getPhysicalDeviceFeatures2KHR(*pgpu, &features2);
|
||||
|
||||
m_shader_types_support.allow_float16 = !!shader_support_info.shaderFloat16;
|
||||
m_shader_types_support.allow_int8 = !!shader_support_info.shaderInt8;
|
||||
|
@ -696,6 +708,19 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
const VkFormatProperties get_format_properties(VkFormat format)
|
||||
{
|
||||
auto found = m_format_properties.find(format);
|
||||
if (found != m_format_properties.end())
|
||||
{
|
||||
return found->second;
|
||||
}
|
||||
|
||||
auto& props = m_format_properties[format];
|
||||
vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props);
|
||||
return props;
|
||||
}
|
||||
|
||||
bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32 *type_index) const
|
||||
{
|
||||
VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties();
|
||||
|
@ -2266,6 +2291,11 @@ public:
|
|||
{
|
||||
extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (support.is_supported("VK_KHR_get_physical_device_properties2"))
|
||||
{
|
||||
extensions.push_back("VK_KHR_get_physical_device_properties2");
|
||||
}
|
||||
#ifdef _WIN32
|
||||
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
|
||||
#elif defined(__APPLE__)
|
||||
|
@ -2357,7 +2387,7 @@ public:
|
|||
CHECK_RESULT(vkEnumeratePhysicalDevices(m_instance, &num_gpus, pdevs.data()));
|
||||
|
||||
for (u32 i = 0; i < num_gpus; ++i)
|
||||
gpus[i].set_device(pdevs[i]);
|
||||
gpus[i].create(m_instance, pdevs[i]);
|
||||
}
|
||||
|
||||
return gpus;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue