SPU: Fix FREST

This commit is contained in:
Eladash 2019-11-15 17:58:41 +02:00 committed by Ivan
parent 9b34f00241
commit c9b0f0e734
3 changed files with 45 additions and 13 deletions

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#ifdef LLVM_AVAILABLE #ifdef LLVM_AVAILABLE
@ -2729,6 +2729,23 @@ public:
return result; return result;
} }
// TODO: Support doubles
auto fre(value_t<f32[4]> a)
{
decltype(a) result;
const auto av = a.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
return result;
}
auto frsqe(value_t<f32[4]> a)
{
decltype(a) result;
const auto av = a.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
return result;
}
template <typename T1, typename T2> template <typename T1, typename T2>
value_t<u8[16]> pshufb(T1 a, T2 b) value_t<u8[16]> pshufb(T1 a, T2 b)
{ {

View file

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "SPUInterpreter.h" #include "SPUInterpreter.h"
#include "Emu/System.h" #include "Emu/System.h"
@ -1903,21 +1903,29 @@ inline bool isdenormal(double x)
bool spu_interpreter_precise::FREST(spu_thread& spu, spu_opcode_t op) bool spu_interpreter_precise::FREST(spu_thread& spu, spu_opcode_t op)
{ {
fesetround(FE_TOWARDZERO); fesetround(FE_TOWARDZERO);
const auto ra = spu.gpr[op.ra];
auto res = v128::fromF(_mm_rcp_ps(ra.vf));
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
const float a = spu.gpr[op.ra]._f[i]; const auto a = ra._f[i];
float result; switch (fexpf(a))
if (fexpf(a) == 0) {
case 0:
{ {
spu.fpscr.setDivideByZeroFlag(i); spu.fpscr.setDivideByZeroFlag(i);
result = extended(std::signbit(a), 0x7FFFFF); res._f[i] = extended(std::signbit(a), 0x7FFFFF);
break;
} }
else if (isextended(a)) case (0x7e800000 >> 23): // Special case for value not handled properly in rcpps
result = 0.0f; {
else res._f[i] = 0.0f;
result = 1 / a; break;
spu.gpr[op.rt]._f[i] = result;
} }
default: break;
}
}
spu.gpr[op.rt] = res;
return true; return true;
} }

View file

@ -7216,9 +7216,16 @@ public:
{ {
// TODO // TODO
if (g_cfg.core.spu_accurate_xfloat) if (g_cfg.core.spu_accurate_xfloat)
set_vr(op.rt, fsplat<f64[4]>(1.0) / get_vr<f64[4]>(op.ra)); {
const auto a = get_vr<f32[4]>(op.ra);
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
const auto mask_de = eval(noncast<u32[4]>(sext<s32[4]>(fcmp_uno(a == fsplat<f32[4]>(0.)))) >> 1);
set_vr(op.rt, (bitcast<s32[4]>(fre(a)) & ~mask_ov) | noncast<s32[4]>(mask_de));
}
else else
set_vr(op.rt, fsplat<f32[4]>(1.0) / get_vr<f32[4]>(op.ra)); {
set_vr(op.rt, fre(get_vr<f32[4]>(op.ra)));
}
} }
void FRSQEST(spu_opcode_t op) void FRSQEST(spu_opcode_t op)