rsx: Compensate for nvidia's low precision attribute interpolation

- The hw generates inaccurate values when doing perspective-correct
  interpolation of vertex output attributes and makes the comparison (a ==
  b) fail even when they are a fixed constant value.
- Increase equality tolerance when doing comparisons in fragment
  shaders for NV cards only to work around this issue.
- Teepo fix
This commit is contained in:
kd-11 2019-04-16 13:50:39 +03:00 committed by kd-11
parent 463b1b220d
commit 60f3059d22
7 changed files with 120 additions and 37 deletions

View file

@ -733,7 +733,7 @@ bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode)
case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true; case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true; case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
case RSX_FP_OPCODE_LIT: case RSX_FP_OPCODE_LIT:
SetDst("lit_legacy($0)"); SetDst("_builtin_lit($0)");
properties.has_lit_op = true; properties.has_lit_op = true;
return true; return true;
case RSX_FP_OPCODE_LIF: SetDst("$Ty(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)", OPFLAGS::skip_type_cast); return true; case RSX_FP_OPCODE_LIF: SetDst("$Ty(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)", OPFLAGS::skip_type_cast); return true;

View file

@ -5,7 +5,37 @@
namespace program_common namespace program_common
{ {
static void insert_compare_op(std::ostream& OS) static void insert_compare_op(std::ostream& OS, bool low_precision)
{
if (low_precision)
{
OS <<
"int compare(float a, float b)\n"
"{\n"
" if (abs(a - b) < 0.000001) return 2;\n"
" return (a > b)? 4 : 1;\n"
"}\n\n"
"bool comparison_passes(float a, float b, uint func)\n"
"{\n"
" if (func == 0) return false; // never\n"
" if (func == 7) return true; // always\n\n"
" int op = compare(a, b);\n"
" switch (func)\n"
" {\n"
" case 1: return op == 1; // less\n"
" case 2: return op == 2; // equal\n"
" case 3: return op <= 2; // lequal\n"
" case 4: return op == 4; // greater\n"
" case 5: return op != 2; // nequal\n"
" case 6: return (op == 4 || op == 2); // gequal\n"
" }\n\n"
" return false; // unreachable\n"
"}\n\n";
}
else
{ {
OS << OS <<
"bool comparison_passes(float a, float b, uint func)\n" "bool comparison_passes(float a, float b, uint func)\n"
@ -24,6 +54,7 @@ namespace program_common
" }\n" " }\n"
"}\n\n"; "}\n\n";
} }
}
static void insert_compare_op_vector(std::ostream& OS) static void insert_compare_op_vector(std::ostream& OS)
{ {
@ -469,13 +500,27 @@ namespace glsl
" ocol3 = " << reg3 << ";\n\n"; " ocol3 = " << reg3 << ";\n\n";
} }
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false) struct shader_properties
{
glsl::program_domain domain;
// Applicable in vertex stage
bool require_lit_emulation;
// Only relevant for fragment programs
bool require_wpos;
bool require_depth_conversion;
bool require_texture_ops;
bool emulate_shadow_compare;
bool low_precision_tests;
};
static void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
{ {
OS << "#define _select mix\n"; OS << "#define _select mix\n";
OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
if (require_lit_emulation) if (props.require_lit_emulation)
{ {
OS << OS <<
"vec4 lit_legacy(vec4 val)" "vec4 lit_legacy(vec4 val)"
@ -492,7 +537,7 @@ namespace glsl
"}\n\n"; "}\n\n";
} }
if (domain == glsl::program_domain::glsl_vertex_program) if (props.domain == glsl::program_domain::glsl_vertex_program)
{ {
OS << OS <<
"vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n" "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"
@ -512,9 +557,9 @@ namespace glsl
return; return;
} }
program_common::insert_compare_op(OS); program_common::insert_compare_op(OS, props.low_precision_tests);
if (require_texture_ops && emulate_pcf) if (props.require_texture_ops && props.emulate_shadow_compare)
{ {
program_common::insert_compare_op_vector(OS); program_common::insert_compare_op_vector(OS);
} }
@ -550,7 +595,7 @@ namespace glsl
" return pow((cs + 0.055) / 1.055, 2.4);\n" " return pow((cs + 0.055) / 1.055, 2.4);\n"
"}\n\n"; "}\n\n";
if (require_depth_conversion) if (props.require_depth_conversion)
{ {
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
//The A component (Z) is useless (should contain stencil8 or just 1) //The A component (Z) is useless (should contain stencil8 or just 1)
@ -594,9 +639,9 @@ namespace glsl
"}\n\n"; "}\n\n";
} }
if (require_texture_ops) if (props.require_texture_ops)
{ {
if (emulate_pcf) if (props.emulate_shadow_compare)
{ {
OS << OS <<
"vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n" "vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n"
@ -671,7 +716,7 @@ namespace glsl
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n"; "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n";
if (emulate_pcf) if (props.emulate_shadow_compare)
{ {
OS << OS <<
"#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n" "#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n"
@ -692,7 +737,7 @@ namespace glsl
"#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), floatBitsToUint(texture_parameters[index].w))\n\n"; "#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), floatBitsToUint(texture_parameters[index].w))\n\n";
} }
if (require_wpos) if (props.require_wpos)
{ {
OS << OS <<
"vec4 get_wpos()\n" "vec4 get_wpos()\n"

View file

@ -132,7 +132,7 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program)
if (!is_fragment_program) if (!is_fragment_program)
return; return;
program_common::insert_compare_op(OS); program_common::insert_compare_op(OS, false);
OS << "uint packSnorm2x16(float2 val)"; OS << "uint packSnorm2x16(float2 val)";
OS << "{\n"; OS << "{\n";

View file

@ -196,8 +196,16 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{ {
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, glsl::shader_properties properties2;
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare); properties2.domain = glsl::glsl_fragment_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_wpos = properties.has_wpos_input;
properties2.require_texture_ops = properties.has_tex_op;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
glsl::insert_glsl_legacy_function(OS, properties2);
} }
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)

View file

@ -154,8 +154,20 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{ {
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op); const auto& dev_caps = gl::get_driver_caps();
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
// Unused
properties2.require_depth_conversion = false;
properties2.require_wpos = false;
properties2.require_texture_ops = false;
properties2.emulate_shadow_compare = false;
properties2.low_precision_tests = false;
insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false);
std::string parameters = ""; std::string parameters = "";
for (int i = 0; i < 16; ++i) for (int i = 0; i < 16; ++i)
@ -306,7 +318,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
//SEE Naruto: UNS //SEE Naruto: UNS
//NOTE: On GPUs, poor fp32 precision means dividing z by w, then multiplying by w again gives slightly incorrect results //NOTE: On GPUs, poor fp32 precision means dividing z by w, then multiplying by w again gives slightly incorrect results
//This equation is simplified algebraically to an addition and subreaction which gives more accurate results (Fixes flickering skybox in Dark Souls 2) //This equation is simplified algebraically to an addition and subtraction which gives more accurate results (Fixes flickering skybox in Dark Souls 2)
//OS << " float ndc_z = gl_Position.z / gl_Position.w;\n"; //OS << " float ndc_z = gl_Position.z / gl_Position.w;\n";
//OS << " ndc_z = (ndc_z * 2.) - 1.;\n"; //OS << " ndc_z = (ndc_z * 2.) - 1.;\n";
//OS << " gl_Position.z = ndc_z * gl_Position.w;\n"; //OS << " gl_Position.z = ndc_z * gl_Position.w;\n";

View file

@ -228,8 +228,16 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{ {
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, glsl::shader_properties properties2;
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare); properties2.domain = glsl::glsl_fragment_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_wpos = properties.has_wpos_input;
properties2.require_texture_ops = properties.has_tex_op;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.low_precision_tests = vk::get_current_renderer()->gpu().get_driver_vendor() == vk::driver_vendor::NVIDIA;
glsl::insert_glsl_legacy_function(OS, properties2);
} }
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)

View file

@ -194,7 +194,17 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{ {
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op); glsl::shader_properties properties2;
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
// Unused
properties2.require_depth_conversion = false;
properties2.require_wpos = false;
properties2.require_texture_ops = false;
properties2.emulate_shadow_compare = false;
properties2.low_precision_tests = false;
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
std::string parameters = ""; std::string parameters = "";