SPU LLVM: Use clamping helpers for FMA32x4 and FM

This commit is contained in:
Malcolm Jestadt 2020-05-31 21:40:48 -04:00 committed by Ivan
parent 66d13da2ac
commit c601374b1f

View file

@ -7281,7 +7281,7 @@ public:
value_t<f32[4]> clamp_smax(value_t<f32[4]> v) value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
{ {
return eval(clamp_negative_smax(clamp_positive_smax(v))); return eval(clamp_positive_smax(clamp_negative_smax(v)));
} }
// FMA favouring zeros // FMA favouring zeros
@ -7457,10 +7457,8 @@ public:
{ {
const auto a = get_vr<f32[4]>(op.ra); const auto a = get_vr<f32[4]>(op.ra);
const auto b = get_vr<f32[4]>(op.rb); const auto b = get_vr<f32[4]>(op.rb);
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)))); const auto ca = eval(clamp_smax(a));
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)))); const auto cb = eval(clamp_smax(b));
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
set_vr(op.rt, ca * cb); set_vr(op.rt, ca * cb);
} }
else else
@ -7527,10 +7525,8 @@ public:
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c) value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
{ {
value_t<f32[4]> r; value_t<f32[4]> r;
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)))); const auto ca = eval(clamp_smax(a));
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)))); const auto cb = eval(clamp_smax(b));
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
// Optimization: Emit only a floating multiply if the addend is zero // Optimization: Emit only a floating multiply if the addend is zero
// This is odd since SPU code could just use the FM instruction, but it seems common enough // This is odd since SPU code could just use the FM instruction, but it seems common enough