rsx: Add support for delayed shader discard.

- Noticed a glitch on AMD hw and windows drivers where discard seems to affect entire 4x4 cells.
- Dead fragments (outside the primitive boundary) could have their discards trigger as they do not have proper access to variables.
- This introduces dead fragments along triangle edges, causing a diagonal line pattern across the screen that is very annoying.
This commit is contained in:
kd-11 2019-10-14 01:24:04 +03:00 committed by kd-11
parent 901942f24a
commit 0b2f9f0f17
7 changed files with 84 additions and 49 deletions

View file

@ -1184,7 +1184,7 @@ std::string FragmentProgramDecompiler::Decompile()
case RSX_FP_OPCODE_NOP: break; case RSX_FP_OPCODE_NOP: break;
case RSX_FP_OPCODE_KIL: case RSX_FP_OPCODE_KIL:
properties.has_discard_op = true; properties.has_discard_op = true;
AddFlowOp("discard"); AddFlowOp("_kill()");
break; break;
default: default:

View file

@ -429,26 +429,39 @@ namespace glsl
"}\n\n"; "}\n\n";
} }
static void insert_rop(std::ostream& OS, bool _32_bit_exports, bool native_half_support, bool emulate_coverage_tests) static void insert_rop(std::ostream& OS, const shader_properties& props)
{ {
const std::string reg0 = _32_bit_exports ? "r0" : "h0"; const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
const std::string reg1 = _32_bit_exports ? "r2" : "h4"; const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
const std::string reg2 = _32_bit_exports ? "r3" : "h6"; const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
const std::string reg3 = _32_bit_exports ? "r4" : "h8"; const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here //TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
if (props.disable_early_discard)
{
OS <<
" if (_fragment_discard)\n"
" {\n"
" discard;\n"
" }\n"
" else if ((rop_control & 0xFF) != 0)\n";
}
else
{
OS << " if ((rop_control & 0xFF) != 0)\n";
}
OS << OS <<
" if ((rop_control & 0xFF) != 0)\n"
" {\n" " {\n"
" bool alpha_test = (rop_control & 0x1) > 0;\n" " bool alpha_test = (rop_control & 0x1) > 0;\n"
" uint alpha_func = ((rop_control >> 16) & 0x7);\n"; " uint alpha_func = ((rop_control >> 16) & 0x7);\n";
if (!_32_bit_exports) if (!props.fp32_outputs)
{ {
OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n"; OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n";
} }
if (emulate_coverage_tests) if (props.emulate_coverage_tests)
{ {
OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n"; OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n";
} }
@ -459,7 +472,7 @@ namespace glsl
" discard;\n" " discard;\n"
" }\n"; " }\n";
if (emulate_coverage_tests) if (props.emulate_coverage_tests)
{ {
OS << OS <<
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n" " else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
@ -468,10 +481,10 @@ namespace glsl
" }\n"; " }\n";
} }
if (!_32_bit_exports) if (!props.fp32_outputs)
{ {
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags // Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
if (native_half_support) if (props.supports_native_fp16)
{ {
OS << OS <<
" else if (srgb_convert)\n" " else if (srgb_convert)\n"
@ -510,6 +523,21 @@ namespace glsl
OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
if (props.domain == glsl::program_domain::glsl_fragment_program)
{
OS << "// Workaround for broken early discard in some drivers\n";
if (props.disable_early_discard)
{
OS << "bool _fragment_discard = false;\n";
OS << "#define _kill() _fragment_discard = true\n\n";
}
else
{
OS << "#define _kill() discard\n\n";
}
}
if (props.require_lit_emulation) if (props.require_lit_emulation)
{ {
OS << OS <<
@ -684,7 +712,7 @@ namespace glsl
" // Alphakill\n" " // Alphakill\n"
" if (rgba.a < 0.000001)\n" " if (rgba.a < 0.000001)\n"
" {\n" " {\n"
" discard;\n" " _kill();\n"
" return rgba;\n" " return rgba;\n"
" }\n" " }\n"
" }\n" " }\n"

View file

@ -30,5 +30,7 @@ namespace glsl
bool emulate_coverage_tests; bool emulate_coverage_tests;
bool emulate_shadow_compare; bool emulate_shadow_compare;
bool low_precision_tests; bool low_precision_tests;
bool disable_early_discard;
bool supports_native_fp16;
}; };
}; };

View file

@ -197,19 +197,20 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{ {
glsl::shader_properties properties2; m_shader_props.domain = glsl::glsl_fragment_program;
properties2.domain = glsl::glsl_fragment_program; m_shader_props.require_lit_emulation = properties.has_lit_op;
properties2.require_lit_emulation = properties.has_lit_op; m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_depth_conversion = m_prog.redirected_textures != 0; m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
properties2.require_wpos = !!(properties.in_register_mask & in_wpos); m_shader_props.require_texture_ops = properties.has_tex_op;
properties2.require_texture_ops = properties.has_tex_op; m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
properties2.require_shadow_ops = m_prog.shadow_textures != 0; m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
properties2.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare; m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA; m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_glsl_legacy_function(OS, m_shader_props);
} }
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -307,11 +308,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "\n" << " fs_main();\n\n"; OS << "\n" << " fs_main();\n\n";
glsl::insert_rop( glsl::insert_rop(OS, m_shader_props);
OS,
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
device_props.has_native_half_support,
g_cfg.video.antialiasing_level == msaa_level::none);
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{ {

View file

@ -1,11 +1,19 @@
#pragma once #pragma once
#include "../Common/FragmentProgramDecompiler.h" #include "../Common/FragmentProgramDecompiler.h"
#include "../Common/GLSLTypes.h"
#include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXFragmentProgram.h"
namespace glsl
{
struct shader_properties;
}
struct GLFragmentDecompilerThread : public FragmentProgramDecompiler struct GLFragmentDecompilerThread : public FragmentProgramDecompiler
{ {
std::string& m_shader; std::string& m_shader;
ParamArray& m_parrDummy; ParamArray& m_parrDummy;
glsl::shader_properties m_shader_props{};
public: public:
GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size) GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size)
: FragmentProgramDecompiler(prog, size) : FragmentProgramDecompiler(prog, size)

View file

@ -228,19 +228,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{ {
glsl::shader_properties properties2; m_shader_props.domain = glsl::glsl_fragment_program;
properties2.domain = glsl::glsl_fragment_program; m_shader_props.require_lit_emulation = properties.has_lit_op;
properties2.require_lit_emulation = properties.has_lit_op; m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_depth_conversion = m_prog.redirected_textures != 0; m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
properties2.require_wpos = !!(properties.in_register_mask & in_wpos); m_shader_props.require_texture_ops = properties.has_tex_op;
properties2.require_texture_ops = properties.has_tex_op; m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
properties2.require_shadow_ops = m_prog.shadow_textures != 0; m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
properties2.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare; m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
properties2.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_glsl_legacy_function(OS, m_shader_props);
} }
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -338,11 +339,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "\n" << " fs_main();\n\n"; OS << "\n" << " fs_main();\n\n";
glsl::insert_rop( glsl::insert_rop(OS, m_shader_props);
OS,
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
device_props.has_native_half_support,
g_cfg.video.antialiasing_level == msaa_level::none);
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{ {

View file

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "../Common/FragmentProgramDecompiler.h" #include "../Common/FragmentProgramDecompiler.h"
#include "../Common/GLSLTypes.h"
#include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXFragmentProgram.h"
#include "VulkanAPI.h" #include "VulkanAPI.h"
#include "VKHelpers.h" #include "VKHelpers.h"
@ -10,6 +11,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler
ParamArray& m_parrDummy; ParamArray& m_parrDummy;
std::vector<vk::glsl::program_input> inputs; std::vector<vk::glsl::program_input> inputs;
class VKFragmentProgram *vk_prog; class VKFragmentProgram *vk_prog;
glsl::shader_properties m_shader_props{};
public: public:
VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst)
: FragmentProgramDecompiler(prog, size) : FragmentProgramDecompiler(prog, size)