glsl/fp/vp: Avoid shader clutter

- Do not add unused subroutines in shaders unless necessary
-- makes shaders easier to read and disassembled spir-v has less clutter
- glsl: Replace switch block with lookup table
This commit is contained in:
kd-11 2018-01-25 00:09:27 +03:00
parent 2e04dceaf0
commit 33bcdd476c
9 changed files with 192 additions and 123 deletions

View file

@ -40,15 +40,48 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask)
{ {
if (dst.exp_tex) if (dst.exp_tex)
{ {
//If dst.exp_tex really is _bx2 postfix, we need to unpack dynamic range //Expand [0,1] to [-1, 1]. Confirmed by Castlevania: LOS
AddCode("//exp tex flag is set"); AddCode("//exp tex flag is set");
code = "((" + code + "- 0.5) * 2.)"; code = "((" + code + "- 0.5) * 2.)";
} }
if (dst.saturate) if (dst.saturate)
{
code = saturate(code); code = saturate(code);
else }
code = ClampValue(code, dst.prec); else if (dst.prec)
{
switch (dst.opcode)
{
case RSX_FP_OPCODE_NRM:
case RSX_FP_OPCODE_MAX:
case RSX_FP_OPCODE_MIN:
case RSX_FP_OPCODE_COS:
case RSX_FP_OPCODE_SIN:
case RSX_FP_OPCODE_REFL:
case RSX_FP_OPCODE_EX2:
case RSX_FP_OPCODE_FRC:
case RSX_FP_OPCODE_LIT:
case RSX_FP_OPCODE_LIF:
case RSX_FP_OPCODE_LRP:
case RSX_FP_OPCODE_LG2:
break;
case RSX_FP_OPCODE_MOV:
//NOTE: Sometimes varying inputs from VS are out of range so do not exempt any input types, unless fp16 (Naruto UNS)
if (dst.fp16 && src0.fp16 && src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP)
break;
default:
{
//fp16 precsion flag on f32 register; ignore
if (dst.prec == 1 && !dst.fp16)
break;
//clamp value to allowed range
code = ClampValue(code, dst.prec);
break;
}
}
}
} }
code += (append_mask ? "$m" : ""); code += (append_mask ? "$m" : "");
@ -385,7 +418,10 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
auto &reg = temp_registers[src.tmp_reg_index]; auto &reg = temp_registers[src.tmp_reg_index];
if (reg.requires_gather(xy_read, zw_read)) if (reg.requires_gather(xy_read, zw_read))
{
properties.has_gather_op = true;
AddCode(reg.gather_r()); AddCode(reg.gather_r());
}
} }
} }
@ -407,7 +443,10 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
switch (dst.src_attr_reg_num) switch (dst.src_attr_reg_num)
{ {
case 0x00: ret += reg_table[0]; break; case 0x00:
ret += reg_table[0];
properties.has_wpos_input = true;
break;
default: default:
if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0]))
{ {
@ -478,24 +517,28 @@ std::string FragmentProgramDecompiler::BuildCode()
//Insert global function definitions //Insert global function definitions
insertGlobalFunctions(OS); insertGlobalFunctions(OS);
std::string float2 = getFloatTypeName(2); //Declare register gather/merge if needed
std::string float4 = getFloatTypeName(4); if (properties.has_gather_op)
{
std::string float2 = getFloatTypeName(2);
std::string float4 = getFloatTypeName(4);
OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n"; OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
OS << "{\n"; OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n"; OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n"; OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n"; OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n"; OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
OS << " return " << float4 << "(x, y, z, w);\n"; OS << " return " << float4 << "(x, y, z, w);\n";
OS << "}\n\n"; OS << "}\n\n";
OS << float2 << " gather(" << float4 << " _h)\n"; OS << float2 << " gather(" << float4 << " _h)\n";
OS << "{\n"; OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n"; OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n"; OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
OS << " return " << float2 << "(x, y);\n"; OS << " return " << float2 << "(x, y);\n";
OS << "}\n\n"; OS << "}\n\n";
}
insertMainStart(OS); insertMainStart(OS);
OS << main << std::endl; OS << main << std::endl;
@ -558,7 +601,10 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode)
case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true; case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true;
case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true; case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true; case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
case RSX_FP_OPCODE_LIT: SetDst("lit_legacy($0)"); return true; case RSX_FP_OPCODE_LIT:
SetDst("lit_legacy($0)");
properties.has_lit_op = true;
return true;
case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true; case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true;
case RSX_FP_OPCODE_LRP: SetDst(getFloatTypeName(4) + "($2 * (1 - $0) + $1 * $0)"); return true; case RSX_FP_OPCODE_LRP: SetDst(getFloatTypeName(4) + "($2 * (1 - $0) + $1 * $0)"); return true;
case RSX_FP_OPCODE_LG2: SetDst("log2(" + NotZeroPositive("$0.x") + ").xxxx"); return true; case RSX_FP_OPCODE_LG2: SetDst("log2(" + NotZeroPositive("$0.x") + ").xxxx"); return true;

View file

@ -219,7 +219,16 @@ protected:
/** insert end of main function (return value, output copy...) /** insert end of main function (return value, output copy...)
*/ */
virtual void insertMainEnd(std::stringstream &OS) = 0; virtual void insertMainEnd(std::stringstream &OS) = 0;
public: public:
struct
{
bool has_lit_op = false;
bool has_gather_op = false;
bool has_wpos_input = false;
}
properties;
ParamArray m_parr; ParamArray m_parr;
FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size); FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size);
FragmentProgramDecompiler(const FragmentProgramDecompiler&) = delete; FragmentProgramDecompiler(const FragmentProgramDecompiler&) = delete;

View file

@ -281,31 +281,18 @@ namespace glsl
OS << " if (desc.attribute_size == 0)\n"; OS << " if (desc.attribute_size == 0)\n";
OS << " {\n"; OS << " {\n";
OS << " //default values\n"; OS << " //default values\n";
OS << " switch (location)\n"; OS << " const vec4 defaults[] = \n";
OS << " {\n"; OS << " { vec4(0., 0., 0., 1.), //position\n";
OS << " case 0:\n"; OS << " vec4(0.), vec4(0.), //weight, normals\n";
OS << " //position\n"; OS << " vec4(1.), //diffuse\n";
OS << " return vec4(0., 0., 0., 1.);\n"; OS << " vec4(0.), vec4(0.), //specular, fog\n";
OS << " case 1:\n"; OS << " vec4(1.), //point size\n";
OS << " case 2:\n"; OS << " vec4(0.), //in_7\n";
OS << " //weight, normals\n"; OS << " //in_tc registers\n";
OS << " return vec4(0.);\n"; OS << " vec4(0.), vec4(0.), vec4(0.), vec4(0.),\n";
OS << " case 3:\n"; OS << " vec4(0.), vec4(0.), vec4(0.), vec4(0.)\n";
OS << " //diffuse\n"; OS << " };\n";
OS << " return vec4(1.);\n"; OS << " return defaults[location];\n";
OS << " case 4:\n";
OS << " //specular\n";
OS << " return vec4(0.);\n";
OS << " case 5:\n";
OS << " //fog\n";
OS << " return vec4(0.);\n";
OS << " case 6:\n";
OS << " //point size\n";
OS << " return vec4(1.);\n";
OS << " default:\n";
OS << " //mostly just texture coordinates\n";
OS << " return vec4(0.);\n";
OS << " }\n";
OS << " }\n\n"; OS << " }\n\n";
OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n"; OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n";
OS << " if (desc.frequency == 0)\n"; OS << " if (desc.frequency == 0)\n";
@ -326,84 +313,95 @@ namespace glsl
OS << "}\n\n"; OS << "}\n\n";
} }
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain) static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false)
{ {
OS << "vec4 lit_legacy(vec4 val)"; if (require_lit_emulation)
OS << "{\n"; {
OS << " vec4 clamped_val = val;\n"; OS << "vec4 lit_legacy(vec4 val)";
OS << " clamped_val.x = max(val.x, 0.);\n"; OS << "{\n";
OS << " clamped_val.y = max(val.y, 0.);\n"; OS << " vec4 clamped_val = val;\n";
OS << " vec4 result;\n"; OS << " clamped_val.x = max(val.x, 0.);\n";
OS << " result.x = 1.;\n"; OS << " clamped_val.y = max(val.y, 0.);\n";
OS << " result.w = 1.;\n"; OS << " vec4 result;\n";
OS << " result.y = clamped_val.x;\n"; OS << " result.x = 1.;\n";
OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n"; OS << " result.w = 1.;\n";
OS << " return result;\n"; OS << " result.y = clamped_val.x;\n";
OS << "}\n\n"; OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n";
OS << " return result;\n";
OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"; OS << "}\n\n";
OS << "{\n"; }
OS << " float d = pos.z / pos.w;\n";
OS << " if (d < 0.f && d >= near_plane)\n";
OS << " d = 0.f;\n"; //force clamp negative values
OS << " else if (d > 1.f && d <= far_plane)\n";
OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n";
OS << " else\n";
OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found
OS << "\n";
OS << " pos.z = d * pos.w;\n";
OS << " return pos;\n";
OS << "}\n\n";
if (domain == glsl::program_domain::glsl_vertex_program) if (domain == glsl::program_domain::glsl_vertex_program)
{
OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n";
OS << "{\n";
OS << " float d = pos.z / pos.w;\n";
OS << " if (d < 0.f && d >= near_plane)\n";
OS << " d = 0.f;\n"; //force clamp negative values
OS << " else if (d > 1.f && d <= far_plane)\n";
OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n";
OS << " else\n";
OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found
OS << "\n";
OS << " pos.z = d * pos.w;\n";
OS << " return pos;\n";
OS << "}\n\n";
return; return;
}
program_common::insert_compare_op(OS); program_common::insert_compare_op(OS);
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) if (require_depth_conversion)
//The A component (Z) is useless (should contain stencil8 or just 1) {
OS << "vec4 decodeLinearDepth(float depth_value)\n"; //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
OS << "{\n"; //The A component (Z) is useless (should contain stencil8 or just 1)
OS << " uint value = uint(depth_value * 16777215);\n"; OS << "vec4 decodeLinearDepth(float depth_value)\n";
OS << " uint b = (value & 0xff);\n"; OS << "{\n";
OS << " uint g = (value >> 8) & 0xff;\n"; OS << " uint value = uint(depth_value * 16777215);\n";
OS << " uint r = (value >> 16) & 0xff;\n"; OS << " uint b = (value & 0xff);\n";
OS << " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n"; OS << " uint g = (value >> 8) & 0xff;\n";
OS << "}\n\n"; OS << " uint r = (value >> 16) & 0xff;\n";
OS << " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n";
OS << "}\n\n";
OS << "float read_value(vec4 src, uint remap_index)\n"; OS << "float read_value(vec4 src, uint remap_index)\n";
OS << "{\n"; OS << "{\n";
OS << " switch (remap_index)\n"; OS << " switch (remap_index)\n";
OS << " {\n"; OS << " {\n";
OS << " case 0: return src.a;\n"; OS << " case 0: return src.a;\n";
OS << " case 1: return src.r;\n"; OS << " case 1: return src.r;\n";
OS << " case 2: return src.g;\n"; OS << " case 2: return src.g;\n";
OS << " case 3: return src.b;\n"; OS << " case 3: return src.b;\n";
OS << " }\n"; OS << " }\n";
OS << "}\n\n"; OS << "}\n\n";
OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n"; OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n";
OS << "{\n"; OS << "{\n";
OS << " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n"; OS << " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n";
OS << " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n"; OS << " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n";
OS << " if (remap_vector == 0xE4) return result;\n\n"; OS << " if (remap_vector == 0xE4) return result;\n\n";
OS << " vec4 tmp;\n"; OS << " vec4 tmp;\n";
OS << " uint remap_a = remap_vector & 0x3;\n"; OS << " uint remap_a = remap_vector & 0x3;\n";
OS << " uint remap_r = (remap_vector >> 2) & 0x3;\n"; OS << " uint remap_r = (remap_vector >> 2) & 0x3;\n";
OS << " uint remap_g = (remap_vector >> 4) & 0x3;\n"; OS << " uint remap_g = (remap_vector >> 4) & 0x3;\n";
OS << " uint remap_b = (remap_vector >> 6) & 0x3;\n"; OS << " uint remap_b = (remap_vector >> 6) & 0x3;\n";
OS << " tmp.a = read_value(result, remap_a);\n"; OS << " tmp.a = read_value(result, remap_a);\n";
OS << " tmp.r = read_value(result, remap_r);\n"; OS << " tmp.r = read_value(result, remap_r);\n";
OS << " tmp.g = read_value(result, remap_g);\n"; OS << " tmp.g = read_value(result, remap_g);\n";
OS << " tmp.b = read_value(result, remap_b);\n"; OS << " tmp.b = read_value(result, remap_b);\n";
OS << " return tmp;\n"; OS << " return tmp;\n";
OS << "}\n\n"; OS << "}\n\n";
}
OS << "vec4 get_wpos()\n"; if (require_wpos)
OS << "{\n"; {
OS << " float abs_scale = abs(wpos_scale);\n"; OS << "vec4 get_wpos()\n";
OS << " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n"; OS << "{\n";
OS << "}\n\n"; OS << " float abs_scale = abs(wpos_scale);\n";
OS << " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n";
OS << "}\n\n";
}
} }
static void insert_fog_declaration(std::ostream& OS) static void insert_fog_declaration(std::ostream& OS)

View file

@ -638,7 +638,10 @@ std::string VertexProgramDecompiler::Decompile()
case RSX_SCA_OPCODE_RSQ: SetDSTSca("1. / sqrt(" + NotZeroPositive("$s.x") +").xxxx"); break; case RSX_SCA_OPCODE_RSQ: SetDSTSca("1. / sqrt(" + NotZeroPositive("$s.x") +").xxxx"); break;
case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break; case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break;
case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break; case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break;
case RSX_SCA_OPCODE_LIT: SetDSTSca("lit_legacy($s)"); break; case RSX_SCA_OPCODE_LIT:
SetDSTSca("lit_legacy($s)");
properties.has_lit_op = true;
break;
case RSX_SCA_OPCODE_BRA: case RSX_SCA_OPCODE_BRA:
{ {
AddCode("$if ($cond) //BRA"); AddCode("$if ($cond) //BRA");

View file

@ -126,7 +126,14 @@ protected:
/** insert end of main function (return value, output copy...) /** insert end of main function (return value, output copy...)
*/ */
virtual void insertMainEnd(std::stringstream &OS) = 0; virtual void insertMainEnd(std::stringstream &OS) = 0;
public: public:
struct
{
bool has_lit_op = false;
}
properties;
VertexProgramDecompiler(const RSXVertexProgram& prog); VertexProgramDecompiler(const RSXVertexProgram& prog);
std::string Decompile(); std::string Decompile();
}; };

View file

@ -194,7 +194,7 @@ namespace
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{ {
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program); glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
} }
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -248,8 +248,11 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
} }
} }
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa"))
OS << " vec4 wpos = get_wpos();\n"; OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
if (properties.has_wpos_input)
OS << " vec4 wpos = get_wpos();\n";
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{ {

View file

@ -149,7 +149,7 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{ {
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);
std::string parameters = ""; std::string parameters = "";

View file

@ -197,7 +197,7 @@ namespace vk
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{ {
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program); glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
} }
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -251,8 +251,11 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
} }
} }
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa"))
OS << " vec4 wpos = get_wpos();\n"; OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
if (properties.has_wpos_input)
OS << " vec4 wpos = get_wpos();\n";
bool two_sided_enabled = m_prog.front_back_color_enabled && (m_prog.back_color_diffuse_output || m_prog.back_color_specular_output); bool two_sided_enabled = m_prog.front_back_color_enabled && (m_prog.back_color_diffuse_output || m_prog.back_color_specular_output);

View file

@ -185,7 +185,7 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{ {
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
std::string parameters = ""; std::string parameters = "";