SPU LLVM: Use clamping helpers for FMA32x4 and FM

2025-07-14 18:58:36 +12:00 · 2020-05-31 21:40:48 -04:00 · 2020-05-31 21:40:48 -04:00 · c601374b1f
commit c601374b1f
parent 66d13da2ac
1 changed files with 5 additions and 9 deletions
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@ -7281,7 +7281,7 @@ public:

 	value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
 	{
-		return eval(clamp_negative_smax(clamp_positive_smax(v)));
+		return eval(clamp_positive_smax(clamp_negative_smax(v)));
 	}

 	// FMA favouring zeros
@ -7457,10 +7457,8 @@ public:
 		{
 			const auto a = get_vr<f32[4]>(op.ra);
 			const auto b = get_vr<f32[4]>(op.rb);
-			const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
-			const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
-			const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
-			const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
+			const auto ca = eval(clamp_smax(a));
+			const auto cb = eval(clamp_smax(b));
 			set_vr(op.rt, ca * cb);
 		}
 		else
@ -7527,10 +7525,8 @@ public:
 	value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
 	{
 		value_t<f32[4]> r;
-		const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
-		const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
-		const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
-		const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
+		const auto ca = eval(clamp_smax(a));
+		const auto cb = eval(clamp_smax(b));

 		// Optimization: Emit only a floating multiply if the addend is zero
 		// This is odd since SPU code could just use the FM instruction, but it seems common enough