mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 02:08:49 +12:00
rsx: Add support for delayed shader discard.
- Noticed a glitch on AMD hw and windows drivers where discard seems to affect entire 4x4 cells. - Dead fragments (outside the primitive boundary) could have their discards trigger as they do not have proper access to variables. - This introduces dead fragments along triangle edges, causing a diagonal line pattern across the screen that is very annoying.
This commit is contained in:
parent
901942f24a
commit
0b2f9f0f17
7 changed files with 84 additions and 49 deletions
|
@ -1184,7 +1184,7 @@ std::string FragmentProgramDecompiler::Decompile()
|
||||||
case RSX_FP_OPCODE_NOP: break;
|
case RSX_FP_OPCODE_NOP: break;
|
||||||
case RSX_FP_OPCODE_KIL:
|
case RSX_FP_OPCODE_KIL:
|
||||||
properties.has_discard_op = true;
|
properties.has_discard_op = true;
|
||||||
AddFlowOp("discard");
|
AddFlowOp("_kill()");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -429,26 +429,39 @@ namespace glsl
|
||||||
"}\n\n";
|
"}\n\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
static void insert_rop(std::ostream& OS, bool _32_bit_exports, bool native_half_support, bool emulate_coverage_tests)
|
static void insert_rop(std::ostream& OS, const shader_properties& props)
|
||||||
{
|
{
|
||||||
const std::string reg0 = _32_bit_exports ? "r0" : "h0";
|
const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
|
||||||
const std::string reg1 = _32_bit_exports ? "r2" : "h4";
|
const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
|
||||||
const std::string reg2 = _32_bit_exports ? "r3" : "h6";
|
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
|
||||||
const std::string reg3 = _32_bit_exports ? "r4" : "h8";
|
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
|
||||||
|
|
||||||
//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
|
//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
|
||||||
|
if (props.disable_early_discard)
|
||||||
|
{
|
||||||
|
OS <<
|
||||||
|
" if (_fragment_discard)\n"
|
||||||
|
" {\n"
|
||||||
|
" discard;\n"
|
||||||
|
" }\n"
|
||||||
|
" else if ((rop_control & 0xFF) != 0)\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OS << " if ((rop_control & 0xFF) != 0)\n";
|
||||||
|
}
|
||||||
|
|
||||||
OS <<
|
OS <<
|
||||||
" if ((rop_control & 0xFF) != 0)\n"
|
|
||||||
" {\n"
|
" {\n"
|
||||||
" bool alpha_test = (rop_control & 0x1) > 0;\n"
|
" bool alpha_test = (rop_control & 0x1) > 0;\n"
|
||||||
" uint alpha_func = ((rop_control >> 16) & 0x7);\n";
|
" uint alpha_func = ((rop_control >> 16) & 0x7);\n";
|
||||||
|
|
||||||
if (!_32_bit_exports)
|
if (!props.fp32_outputs)
|
||||||
{
|
{
|
||||||
OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n";
|
OS << " bool srgb_convert = (rop_control & 0x2) > 0;\n\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (emulate_coverage_tests)
|
if (props.emulate_coverage_tests)
|
||||||
{
|
{
|
||||||
OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n";
|
OS << " bool a2c_enabled = (rop_control & 0x10) > 0;\n";
|
||||||
}
|
}
|
||||||
|
@ -459,7 +472,7 @@ namespace glsl
|
||||||
" discard;\n"
|
" discard;\n"
|
||||||
" }\n";
|
" }\n";
|
||||||
|
|
||||||
if (emulate_coverage_tests)
|
if (props.emulate_coverage_tests)
|
||||||
{
|
{
|
||||||
OS <<
|
OS <<
|
||||||
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
|
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
|
||||||
|
@ -468,10 +481,10 @@ namespace glsl
|
||||||
" }\n";
|
" }\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_32_bit_exports)
|
if (!props.fp32_outputs)
|
||||||
{
|
{
|
||||||
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
|
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
|
||||||
if (native_half_support)
|
if (props.supports_native_fp16)
|
||||||
{
|
{
|
||||||
OS <<
|
OS <<
|
||||||
" else if (srgb_convert)\n"
|
" else if (srgb_convert)\n"
|
||||||
|
@ -510,6 +523,21 @@ namespace glsl
|
||||||
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
|
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
|
||||||
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
|
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
|
||||||
|
|
||||||
|
if (props.domain == glsl::program_domain::glsl_fragment_program)
|
||||||
|
{
|
||||||
|
OS << "// Workaround for broken early discard in some drivers\n";
|
||||||
|
|
||||||
|
if (props.disable_early_discard)
|
||||||
|
{
|
||||||
|
OS << "bool _fragment_discard = false;\n";
|
||||||
|
OS << "#define _kill() _fragment_discard = true\n\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OS << "#define _kill() discard\n\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (props.require_lit_emulation)
|
if (props.require_lit_emulation)
|
||||||
{
|
{
|
||||||
OS <<
|
OS <<
|
||||||
|
@ -684,7 +712,7 @@ namespace glsl
|
||||||
" // Alphakill\n"
|
" // Alphakill\n"
|
||||||
" if (rgba.a < 0.000001)\n"
|
" if (rgba.a < 0.000001)\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" discard;\n"
|
" _kill();\n"
|
||||||
" return rgba;\n"
|
" return rgba;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
|
|
|
@ -30,5 +30,7 @@ namespace glsl
|
||||||
bool emulate_coverage_tests;
|
bool emulate_coverage_tests;
|
||||||
bool emulate_shadow_compare;
|
bool emulate_shadow_compare;
|
||||||
bool low_precision_tests;
|
bool low_precision_tests;
|
||||||
|
bool disable_early_discard;
|
||||||
|
bool supports_native_fp16;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
@ -197,19 +197,20 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||||
|
|
||||||
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||||
{
|
{
|
||||||
glsl::shader_properties properties2;
|
m_shader_props.domain = glsl::glsl_fragment_program;
|
||||||
properties2.domain = glsl::glsl_fragment_program;
|
m_shader_props.require_lit_emulation = properties.has_lit_op;
|
||||||
properties2.require_lit_emulation = properties.has_lit_op;
|
m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
||||||
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||||
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
|
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
|
||||||
properties2.require_wpos = !!(properties.in_register_mask & in_wpos);
|
m_shader_props.require_texture_ops = properties.has_tex_op;
|
||||||
properties2.require_texture_ops = properties.has_tex_op;
|
m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
|
||||||
properties2.require_shadow_ops = m_prog.shadow_textures != 0;
|
m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
|
||||||
properties2.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
|
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||||
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
|
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
||||||
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
|
||||||
|
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
||||||
|
|
||||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||||
|
@ -307,11 +308,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
|
||||||
|
|
||||||
OS << "\n" << " fs_main();\n\n";
|
OS << "\n" << " fs_main();\n\n";
|
||||||
|
|
||||||
glsl::insert_rop(
|
glsl::insert_rop(OS, m_shader_props);
|
||||||
OS,
|
|
||||||
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
|
|
||||||
device_props.has_native_half_support,
|
|
||||||
g_cfg.video.antialiasing_level == msaa_level::none);
|
|
||||||
|
|
||||||
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,11 +1,19 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "../Common/FragmentProgramDecompiler.h"
|
#include "../Common/FragmentProgramDecompiler.h"
|
||||||
|
#include "../Common/GLSLTypes.h"
|
||||||
#include "Emu/RSX/RSXFragmentProgram.h"
|
#include "Emu/RSX/RSXFragmentProgram.h"
|
||||||
|
|
||||||
|
namespace glsl
|
||||||
|
{
|
||||||
|
struct shader_properties;
|
||||||
|
}
|
||||||
|
|
||||||
struct GLFragmentDecompilerThread : public FragmentProgramDecompiler
|
struct GLFragmentDecompilerThread : public FragmentProgramDecompiler
|
||||||
{
|
{
|
||||||
std::string& m_shader;
|
std::string& m_shader;
|
||||||
ParamArray& m_parrDummy;
|
ParamArray& m_parrDummy;
|
||||||
|
glsl::shader_properties m_shader_props{};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size)
|
GLFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size)
|
||||||
: FragmentProgramDecompiler(prog, size)
|
: FragmentProgramDecompiler(prog, size)
|
||||||
|
|
|
@ -228,19 +228,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||||
|
|
||||||
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||||
{
|
{
|
||||||
glsl::shader_properties properties2;
|
m_shader_props.domain = glsl::glsl_fragment_program;
|
||||||
properties2.domain = glsl::glsl_fragment_program;
|
m_shader_props.require_lit_emulation = properties.has_lit_op;
|
||||||
properties2.require_lit_emulation = properties.has_lit_op;
|
m_shader_props.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
||||||
properties2.fp32_outputs = !!(m_prog.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS);
|
m_shader_props.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||||
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
|
m_shader_props.require_wpos = !!(properties.in_register_mask & in_wpos);
|
||||||
properties2.require_wpos = !!(properties.in_register_mask & in_wpos);
|
m_shader_props.require_texture_ops = properties.has_tex_op;
|
||||||
properties2.require_texture_ops = properties.has_tex_op;
|
m_shader_props.require_shadow_ops = m_prog.shadow_textures != 0;
|
||||||
properties2.require_shadow_ops = m_prog.shadow_textures != 0;
|
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
|
||||||
properties2.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
|
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||||
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
|
m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
||||||
properties2.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
|
||||||
|
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
||||||
|
|
||||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||||
|
@ -338,11 +339,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
|
||||||
|
|
||||||
OS << "\n" << " fs_main();\n\n";
|
OS << "\n" << " fs_main();\n\n";
|
||||||
|
|
||||||
glsl::insert_rop(
|
glsl::insert_rop(OS, m_shader_props);
|
||||||
OS,
|
|
||||||
!!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS),
|
|
||||||
device_props.has_native_half_support,
|
|
||||||
g_cfg.video.antialiasing_level == msaa_level::none);
|
|
||||||
|
|
||||||
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "../Common/FragmentProgramDecompiler.h"
|
#include "../Common/FragmentProgramDecompiler.h"
|
||||||
|
#include "../Common/GLSLTypes.h"
|
||||||
#include "Emu/RSX/RSXFragmentProgram.h"
|
#include "Emu/RSX/RSXFragmentProgram.h"
|
||||||
#include "VulkanAPI.h"
|
#include "VulkanAPI.h"
|
||||||
#include "VKHelpers.h"
|
#include "VKHelpers.h"
|
||||||
|
@ -10,6 +11,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler
|
||||||
ParamArray& m_parrDummy;
|
ParamArray& m_parrDummy;
|
||||||
std::vector<vk::glsl::program_input> inputs;
|
std::vector<vk::glsl::program_input> inputs;
|
||||||
class VKFragmentProgram *vk_prog;
|
class VKFragmentProgram *vk_prog;
|
||||||
|
glsl::shader_properties m_shader_props{};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst)
|
VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst)
|
||||||
: FragmentProgramDecompiler(prog, size)
|
: FragmentProgramDecompiler(prog, size)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue