mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 06:21:26 +12:00
SPU LLVM: Improve approx FCGT (#8728)
This commit is contained in:
parent
01d3585bf3
commit
995cb8125e
1 changed files with 33 additions and 43 deletions
|
@ -7552,63 +7552,53 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto a = get_vr<f32[4]>(op.ra);
|
const auto [a, b] = get_vrs<f32[4]>(op.ra, op.rb);
|
||||||
const auto b = get_vr<f32[4]>(op.rb);
|
const value_t<f32[4]> ab[2]{a, b};
|
||||||
|
|
||||||
if (auto [ok, data] = get_const_vector(b.value, m_pos, 5000); ok)
|
std::bitset<2> safe_int_compare(0);
|
||||||
|
std::bitset<2> safe_nonzero_compare(0);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < 2; i++)
|
||||||
{
|
{
|
||||||
bool safe_int_compare = true;
|
if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, 5000); ok)
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++)
|
|
||||||
{
|
{
|
||||||
const u32 exponent = data._u32[i] & 0x7f800000u;
|
safe_int_compare.set(i);
|
||||||
|
safe_nonzero_compare.set(i);
|
||||||
|
|
||||||
if (data._u32[i] >= 0x7f7fffffu || !exponent)
|
for (u32 j = 0; j < 4; j++)
|
||||||
{
|
{
|
||||||
// Postive or negative zero, Denormal (treated as zero), Negative constant, or Normalized number with exponent +127
|
const u32 value = data._u32[j];
|
||||||
// Cannot used signed integer compare safely
|
const u8 exponent = static_cast<u8>(value >> 23);
|
||||||
// Note: Technically this optimization is accurate for any positive value, but due to the fact that
|
|
||||||
// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
|
|
||||||
// this optimization for values outside of the range of x86 floating point hardware.
|
|
||||||
safe_int_compare = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (safe_int_compare)
|
if (value >= 0x7f7fffffu || !exponent)
|
||||||
{
|
{
|
||||||
set_vr(op.rt, sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
|
// Postive or negative zero, Denormal (treated as zero), Negative constant, or Normalized number with exponent +127
|
||||||
return;
|
// Cannot used signed integer compare safely
|
||||||
|
// Note: Technically this optimization is accurate for any positive value, but due to the fact that
|
||||||
|
// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
|
||||||
|
// this optimization for values outside of the range of x86 floating point hardware.
|
||||||
|
safe_int_compare.reset(i);
|
||||||
|
if (!exponent) safe_nonzero_compare.reset(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto [ok, data] = get_const_vector(a.value, m_pos, 5000); ok)
|
if (safe_int_compare.any())
|
||||||
{
|
{
|
||||||
bool safe_int_compare = true;
|
set_vr(op.rt, sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
|
||||||
|
return;
|
||||||
for (u32 i = 0; i < 4; i++)
|
|
||||||
{
|
|
||||||
const u32 exponent = data._u32[i] & 0x7f800000u;
|
|
||||||
|
|
||||||
if (data._u32[i] >= 0x7f7fffffu || !exponent)
|
|
||||||
{
|
|
||||||
// See above
|
|
||||||
safe_int_compare = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (safe_int_compare)
|
|
||||||
{
|
|
||||||
set_vr(op.rt, sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_approx_xfloat)
|
if (g_cfg.core.spu_approx_xfloat)
|
||||||
{
|
{
|
||||||
const auto ca = eval(clamp_positive_smax(a));
|
const auto ai = eval(bitcast<s32[4]>(a));
|
||||||
const auto cb = eval(clamp_negative_smax(b));
|
const auto bi = eval(bitcast<s32[4]>(b));
|
||||||
set_vr(op.rt, sext<s32[4]>(fcmp_ord(ca > cb)));
|
|
||||||
|
if (!safe_nonzero_compare.any())
|
||||||
|
set_vr(op.rt, sext<s32[4]>(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi)));
|
||||||
|
else
|
||||||
|
set_vr(op.rt, sext<s32[4]>(select((ai & bi) >= 0, ai > bi, ai < bi)));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue