SPU LLVM: Use clamping helpers for FMA32x4 and FM

This commit is contained in:
Malcolm Jestadt 2020-05-31 21:40:48 -04:00 committed by Ivan
parent 66d13da2ac
commit c601374b1f

View file

@ -7281,7 +7281,7 @@ public:
value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
{
return eval(clamp_negative_smax(clamp_positive_smax(v)));
return eval(clamp_positive_smax(clamp_negative_smax(v)));
}
// FMA favouring zeros
@ -7457,10 +7457,8 @@ public:
{
const auto a = get_vr<f32[4]>(op.ra);
const auto b = get_vr<f32[4]>(op.rb);
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
const auto ca = eval(clamp_smax(a));
const auto cb = eval(clamp_smax(b));
set_vr(op.rt, ca * cb);
}
else
@ -7527,10 +7525,8 @@ public:
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
{
value_t<f32[4]> r;
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
const auto ca = eval(clamp_smax(a));
const auto cb = eval(clamp_smax(b));
// Optimization: Emit only a floating multiply if the addend is zero
// This is odd since SPU code could just use the FM instruction, but it seems common enough