mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 18:58:36 +12:00
SPU LLVM: Use clamping helpers for FMA32x4 and FM
This commit is contained in:
parent
66d13da2ac
commit
c601374b1f
1 changed files with 5 additions and 9 deletions
|
@ -7281,7 +7281,7 @@ public:
|
|||
|
||||
value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
|
||||
{
|
||||
return eval(clamp_negative_smax(clamp_positive_smax(v)));
|
||||
return eval(clamp_positive_smax(clamp_negative_smax(v)));
|
||||
}
|
||||
|
||||
// FMA favouring zeros
|
||||
|
@ -7457,10 +7457,8 @@ public:
|
|||
{
|
||||
const auto a = get_vr<f32[4]>(op.ra);
|
||||
const auto b = get_vr<f32[4]>(op.rb);
|
||||
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
|
||||
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
|
||||
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
|
||||
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
|
||||
const auto ca = eval(clamp_smax(a));
|
||||
const auto cb = eval(clamp_smax(b));
|
||||
set_vr(op.rt, ca * cb);
|
||||
}
|
||||
else
|
||||
|
@ -7527,10 +7525,8 @@ public:
|
|||
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
|
||||
{
|
||||
value_t<f32[4]> r;
|
||||
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
|
||||
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
|
||||
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
|
||||
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
|
||||
const auto ca = eval(clamp_smax(a));
|
||||
const auto cb = eval(clamp_smax(b));
|
||||
|
||||
// Optimization: Emit only a floating multiply if the addend is zero
|
||||
// This is odd since SPU code could just use the FM instruction, but it seems common enough
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue