mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 10:18:40 +12:00
PPU LLVM: Fix VMAXFP, VMINFP NaN handling
This commit is contained in:
parent
17f965c171
commit
6a51c27fde
2 changed files with 30 additions and 8 deletions
|
@ -2744,22 +2744,44 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Support doubles
|
// TODO: Support doubles
|
||||||
auto fre(value_t<f32[4]> a)
|
template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||||
|
auto fre(T a)
|
||||||
{
|
{
|
||||||
decltype(a) result;
|
value_t<typename T::type> result;
|
||||||
const auto av = a.eval(m_ir);
|
const auto av = a.eval(m_ir);
|
||||||
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
|
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto frsqe(value_t<f32[4]> a)
|
template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||||
|
auto frsqe(T a)
|
||||||
{
|
{
|
||||||
decltype(a) result;
|
value_t<typename T::type> result;
|
||||||
const auto av = a.eval(m_ir);
|
const auto av = a.eval(m_ir);
|
||||||
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
|
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||||
|
auto fmax(T a, U b)
|
||||||
|
{
|
||||||
|
value_t<typename T::type> result;
|
||||||
|
const auto av = a.eval(m_ir);
|
||||||
|
const auto bv = b.eval(m_ir);
|
||||||
|
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.max.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||||
|
auto fmin(T a, U b)
|
||||||
|
{
|
||||||
|
value_t<typename T::type> result;
|
||||||
|
const auto av = a.eval(m_ir);
|
||||||
|
const auto bv = b.eval(m_ir);
|
||||||
|
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.min.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2>
|
template <typename T1, typename T2>
|
||||||
value_t<u8[16]> pshufb(T1 a, T2 b)
|
value_t<u8[16]> pshufb(T1 a, T2 b)
|
||||||
{
|
{
|
||||||
|
|
|
@ -973,8 +973,8 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
|
||||||
|
|
||||||
void PPUTranslator::VMAXFP(ppu_opcode_t op)
|
void PPUTranslator::VMAXFP(ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
|
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
|
||||||
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
|
set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmin(a, b)) == bitcast<u32[4]>(a), b, a)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::VMAXSB(ppu_opcode_t op)
|
void PPUTranslator::VMAXSB(ppu_opcode_t op)
|
||||||
|
@ -1045,8 +1045,8 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)
|
||||||
|
|
||||||
void PPUTranslator::VMINFP(ppu_opcode_t op)
|
void PPUTranslator::VMINFP(ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
|
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
|
||||||
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
|
set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmax(a, b)) == bitcast<u32[4]>(a), b, a)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::VMINSB(ppu_opcode_t op)
|
void PPUTranslator::VMINSB(ppu_opcode_t op)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue