mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 05:51:27 +12:00
Implement v128::fma32f
This commit is contained in:
parent
ebbf329b6a
commit
3b8e7d0967
3 changed files with 37 additions and 12 deletions
|
@ -3,6 +3,7 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "util/endian.hpp"
|
#include "util/endian.hpp"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#if __has_include(<bit>)
|
#if __has_include(<bit>)
|
||||||
#include <bit>
|
#include <bit>
|
||||||
|
@ -322,6 +323,36 @@ union alignas(16) v128
|
||||||
return fromD(_mm_cmpeq_pd(left.vd, right.vd));
|
return fromD(_mm_cmpeq_pd(left.vd, right.vd));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool use_fma = false;
|
||||||
|
|
||||||
|
static inline v128 fma32f(v128 a, const v128& b, const v128& c)
|
||||||
|
{
|
||||||
|
#ifndef __FMA__
|
||||||
|
if (use_fma) [[likely]]
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
a.vf = _mm_fmadd_ps(a.vf, b.vf, c.vf);
|
||||||
|
return a;
|
||||||
|
#else
|
||||||
|
__asm__("vfmadd213ps %[c], %[b], %[a]"
|
||||||
|
: [a] "+x" (a.vf)
|
||||||
|
: [b] "x" (b.vf)
|
||||||
|
, [c] "x" (c.vf));
|
||||||
|
return a;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
a._f[i] = std::fmaf(a._f[i], b._f[i], c._f[i]);
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
#else
|
||||||
|
a.vf = _mm_fmadd_ps(a.vf, b.vf, c.vf);
|
||||||
|
return a;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
bool operator==(const v128& right) const
|
bool operator==(const v128& right) const
|
||||||
{
|
{
|
||||||
return _u64[0] == right._u64[0] && _u64[1] == right._u64[1];
|
return _u64[0] == right._u64[0] && _u64[1] == right._u64[1];
|
||||||
|
|
|
@ -959,9 +959,9 @@ bool ppu_interpreter::VLOGEFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||||
bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto a = ppu.vr[op.va].vf;
|
const auto a = ppu.vr[op.va].vf;
|
||||||
const auto b = ppu.vr[op.vc].vf;
|
const auto b = ppu.vr[op.vb].vf;
|
||||||
const auto c = ppu.vr[op.vb].vf;
|
const auto c = ppu.vr[op.vc].vf;
|
||||||
const auto result = _mm_add_ps(_mm_mul_ps(a, b), c);
|
const auto result = _mm_add_ps(_mm_mul_ps(a, c), b);
|
||||||
ppu.vr[op.vd] = vec_handle_nan(result);
|
ppu.vr[op.vd] = vec_handle_nan(result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -971,15 +971,7 @@ bool ppu_interpreter_precise::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||||
const auto a = ppu.vr[op.va];
|
const auto a = ppu.vr[op.va];
|
||||||
const auto b = ppu.vr[op.vb];
|
const auto b = ppu.vr[op.vb];
|
||||||
const auto c = ppu.vr[op.vc];
|
const auto c = ppu.vr[op.vc];
|
||||||
v128 d;
|
ppu.vr[op.rd] = vec_handle_nan(v128::fma32f(a, c, b), a, b, c);
|
||||||
|
|
||||||
// TODO: Optimize
|
|
||||||
for (u32 i = 0; i < 4; i++)
|
|
||||||
{
|
|
||||||
d._f[i] = f32(f64{a._f[i]} * f64{c._f[i]} + f64{b._f[i]});
|
|
||||||
}
|
|
||||||
|
|
||||||
ppu.vr[op.rd] = vec_handle_nan(d, a, b, c);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -268,6 +268,8 @@ int main(int argc, char** argv)
|
||||||
const u64 intro_time = (intro_stats.ru_utime.tv_sec + intro_stats.ru_stime.tv_sec) * 1000000000ull + (intro_stats.ru_utime.tv_usec + intro_stats.ru_stime.tv_usec) * 1000ull;
|
const u64 intro_time = (intro_stats.ru_utime.tv_sec + intro_stats.ru_stime.tv_sec) * 1000000000ull + (intro_stats.ru_utime.tv_usec + intro_stats.ru_stime.tv_usec) * 1000ull;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
v128::use_fma = utils::has_fma3();
|
||||||
|
|
||||||
s_argv0 = argv[0]; // Save for report_fatal_error
|
s_argv0 = argv[0]; // Save for report_fatal_error
|
||||||
|
|
||||||
// Only run RPCS3 to display an error
|
// Only run RPCS3 to display an error
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue