mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-08 07:51:28 +12:00
rsx/fp: RE work on precision modifier bits
- Testing DS2 has revealed clamping bits in SRC1 that were not respected and left negative values reaching the framebuffer
This commit is contained in:
parent
bbb3bdf008
commit
a8c0dd649e
3 changed files with 18 additions and 13 deletions
|
@ -38,10 +38,17 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask)
|
||||||
|
|
||||||
if (!dst.no_dest)
|
if (!dst.no_dest)
|
||||||
{
|
{
|
||||||
code = NoOverflow(code);
|
if (dst.exp_tex)
|
||||||
|
{
|
||||||
|
//If dst.exp_tex really is _bx2 postfix, we need to unpack dynamic range
|
||||||
|
AddCode("//exp tex flag is set");
|
||||||
|
code = "((" + code + "- 0.5) * 2.)";
|
||||||
|
}
|
||||||
|
|
||||||
if (dst.saturate)
|
if (dst.saturate)
|
||||||
code = saturate(code);
|
code = saturate(code);
|
||||||
|
else
|
||||||
|
code = ClampValue(code, dst.prec);
|
||||||
}
|
}
|
||||||
|
|
||||||
code += (append_mask ? "$m" : "");
|
code += (append_mask ? "$m" : "");
|
||||||
|
@ -188,20 +195,13 @@ std::string FragmentProgramDecompiler::NotZeroPositive(const std::string& code)
|
||||||
return "max(abs(" + code + "), 1.E-10)";
|
return "max(abs(" + code + "), 1.E-10)";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string FragmentProgramDecompiler::NoOverflow(const std::string& code)
|
std::string FragmentProgramDecompiler::ClampValue(const std::string& code, u32 precision)
|
||||||
{
|
{
|
||||||
//FP16 is expected to overflow alot easier at 0+-65504
|
//FP16 is expected to overflow alot easier at 0+-65504
|
||||||
//FP32 can still work upto 0+-3.4E38
|
//FP32 can still work upto 0+-3.4E38
|
||||||
//See http://http.download.nvidia.com/developer/Papers/2005/FP_Specials/FP_Specials.pdf
|
//See http://http.download.nvidia.com/developer/Papers/2005/FP_Specials/FP_Specials.pdf
|
||||||
|
|
||||||
if (dst.exp_tex)
|
switch (precision)
|
||||||
{
|
|
||||||
//If dst.exp_tex really is _bx2 postfix, we need to unpack dynamic range
|
|
||||||
AddCode("//exp tex flag is set");
|
|
||||||
return "((" + code + "- 0.5) * 2.)";
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (dst.prec)
|
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
|
@ -209,6 +209,10 @@ std::string FragmentProgramDecompiler::NoOverflow(const std::string& code)
|
||||||
return "clamp(" + code + ", -65504., 65504.)";
|
return "clamp(" + code + ", -65504., 65504.)";
|
||||||
case 2:
|
case 2:
|
||||||
return "clamp(" + code + ", -2., 2.)";
|
return "clamp(" + code + ", -2., 2.)";
|
||||||
|
case 3:
|
||||||
|
return "clamp(" + code + ", -1., 1.)";
|
||||||
|
case 4:
|
||||||
|
return "clamp(" + code + ", 0., 1.)";
|
||||||
}
|
}
|
||||||
|
|
||||||
return code;
|
return code;
|
||||||
|
@ -396,6 +400,7 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
|
||||||
|
|
||||||
if (src.abs) ret = "abs(" + ret + ")";
|
if (src.abs) ret = "abs(" + ret + ")";
|
||||||
if (src.neg) ret = "-" + ret;
|
if (src.neg) ret = "-" + ret;
|
||||||
|
if (src1.input_prec_mod) ret = ClampValue(ret, src1.input_prec_mod);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,8 +55,8 @@ class FragmentProgramDecompiler
|
||||||
std::string NotZero(const std::string& code);
|
std::string NotZero(const std::string& code);
|
||||||
std::string NotZeroPositive(const std::string& code);
|
std::string NotZeroPositive(const std::string& code);
|
||||||
|
|
||||||
//Prevents operations from overflowing the max range (tested with fp_dynamic3 autotest sample)
|
//Prevents operations from overflowing the desired range (tested with fp_dynamic3 autotest sample, DS2 for src1.input_prec_mod)
|
||||||
std::string NoOverflow(const std::string& code);
|
std::string ClampValue(const std::string& code, u32 precision);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the dst set is not a vector (i.e only a single component)
|
* Returns true if the dst set is not a vector (i.e only a single component)
|
||||||
|
|
|
@ -147,7 +147,7 @@ union SRC1
|
||||||
u32 swizzle_w : 2;
|
u32 swizzle_w : 2;
|
||||||
u32 neg : 1;
|
u32 neg : 1;
|
||||||
u32 abs : 1;
|
u32 abs : 1;
|
||||||
u32 input_mod_src0 : 3;
|
u32 input_prec_mod : 3; // Looks to be a precision clamping modifier affecting all inputs (tested with Dark Souls II)
|
||||||
u32 : 6;
|
u32 : 6;
|
||||||
u32 scale : 3;
|
u32 scale : 3;
|
||||||
u32 opcode_is_branch : 1;
|
u32 opcode_is_branch : 1;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue