SPU LLVM: Add accurate double-precision FMA support

This commit is contained in:
Eladash 2020-04-03 08:11:47 +03:00 committed by Ivan
parent 1b68f90e42
commit 158b24ec25
9 changed files with 62 additions and 26 deletions

View file

@ -1628,7 +1628,7 @@ extern void ppu_initialize(const ppu_module& info)
#ifndef _WIN32 #ifndef _WIN32
settings += ppu_settings::non_win32; settings += ppu_settings::non_win32;
#endif #endif
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
settings += ppu_settings::accurate_fma; settings += ppu_settings::accurate_fma;
} }

View file

@ -3881,7 +3881,7 @@ void PPUTranslator::FMADDS(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
} }
@ -3909,7 +3909,7 @@ void PPUTranslator::FMSUBS(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
} }
@ -3937,7 +3937,7 @@ void PPUTranslator::FNMSUBS(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
} }
@ -3965,7 +3965,7 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
} }
@ -4225,7 +4225,7 @@ void PPUTranslator::FMSUB(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
} }
@ -4253,7 +4253,7 @@ void PPUTranslator::FMADD(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), { a, c, b }); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), { a, c, b });
} }
@ -4281,7 +4281,7 @@ void PPUTranslator::FNMSUB(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
} }
@ -4309,7 +4309,7 @@ void PPUTranslator::FNMADD(ppu_opcode_t op)
const auto c = GetFpr(op.frc); const auto c = GetFpr(op.frc);
llvm::Value* result; llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma) if (g_cfg.core.llvm_accurate_dfma)
{ {
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b}); result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
} }

View file

@ -7192,22 +7192,58 @@ public:
void DFMA(spu_opcode_t op) void DFMA(spu_opcode_t op)
{ {
set_vr(op.rt, get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) + get_vr<f64[2]>(op.rt)); const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
set_vr(op.rt, r);
}
else
set_vr(op.rt, a * b + c);
} }
void DFMS(spu_opcode_t op) void DFMS(spu_opcode_t op)
{ {
set_vr(op.rt, get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) - get_vr<f64[2]>(op.rt)); const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, eval(-c).value});
set_vr(op.rt, r);
}
else
set_vr(op.rt, a * b - c);
} }
void DFNMS(spu_opcode_t op) void DFNMS(spu_opcode_t op)
{ {
set_vr(op.rt, get_vr<f64[2]>(op.rt) - get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb)); const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {eval(-a).value, b.value, c.value});
set_vr(op.rt, r);
}
else
set_vr(op.rt, c - (a * b));
} }
void DFNMA(spu_opcode_t op) void DFNMA(spu_opcode_t op)
{ {
set_vr(op.rt, -(get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) + get_vr<f64[2]>(op.rt))); const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
set_vr(op.rt, -r);
}
else
set_vr(op.rt, -(a * b + c));
} }
// clamping helpers // clamping helpers

View file

@ -45,7 +45,7 @@ struct cfg_root : cfg::node
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", tsx_usage::enabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", tsx_usage::enabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false }; cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true }; cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
cfg::_bool ppu_accurate_fma{ this, "PPU Accurate FMA", true }; // Enable accurate FMA for CPUs which do not support it natively (can't be disabled for CPUs which do support it) cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended
cfg::_enum<lib_loading_type> lib_loading{ this, "Lib Loader", lib_loading_type::liblv2only }; cfg::_enum<lib_loading_type> lib_loading{ this, "Lib Loader", lib_loading_type::liblv2only };

View file

@ -111,7 +111,7 @@ private:
{ emu_settings_type::EnableTSX, { "Core", "Enable TSX"}}, { emu_settings_type::EnableTSX, { "Core", "Enable TSX"}},
{ emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}}, { emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},
{ emu_settings_type::AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}}, { emu_settings_type::AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}},
{ emu_settings_type::AccuratePPUfma, { "Core", "PPU Accurate FMA"}}, { emu_settings_type::AccurateLLVMdfma, { "Core", "LLVM Accurate DFMA"}},
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}}, { emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}}, { emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}}, { emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},

View file

@ -17,7 +17,7 @@ enum class emu_settings_type
EnableTSX, EnableTSX,
AccurateGETLLAR, AccurateGETLLAR,
AccuratePUTLLUC, AccuratePUTLLUC,
AccuratePPUfma, AccurateLLVMdfma,
AccurateRSXAccess, AccurateRSXAccess,
AccurateXFloat, AccurateXFloat,
SetDAZandFTZ, SetDAZandFTZ,

View file

@ -931,8 +931,8 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->debugConsoleMode, emu_settings_type::DebugConsoleMode); m_emu_settings->EnhanceCheckBox(ui->debugConsoleMode, emu_settings_type::DebugConsoleMode);
SubscribeTooltip(ui->debugConsoleMode, tooltips.settings.debug_console_mode); SubscribeTooltip(ui->debugConsoleMode, tooltips.settings.debug_console_mode);
m_emu_settings->EnhanceCheckBox(ui->accuratePPUfma, emu_settings_type::AccuratePPUfma); m_emu_settings->EnhanceCheckBox(ui->accurateLLVMdfma, emu_settings_type::AccurateLLVMdfma);
SubscribeTooltip(ui->accuratePPUfma, tooltips.settings.accurate_ppu_fma); SubscribeTooltip(ui->accurateLLVMdfma, tooltips.settings.accurate_llvm_dfma);
m_emu_settings->EnhanceCheckBox(ui->silenceAllLogs, emu_settings_type::SilenceAllLogs); m_emu_settings->EnhanceCheckBox(ui->silenceAllLogs, emu_settings_type::SilenceAllLogs);
SubscribeTooltip(ui->silenceAllLogs, tooltips.settings.silence_all_logs); SubscribeTooltip(ui->silenceAllLogs, tooltips.settings.silence_all_logs);

View file

@ -1717,13 +1717,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item> <item>
<widget class="QCheckBox" name="accuratePPUfma"> <widget class="QCheckBox" name="accurateLLVMdfma">
<property name="text"> <property name="text">
<string>Accurate PPU FMA</string> <string>Accurate LLVM DFMA</string>
</property> </property>
</widget> </widget>
</item> </item>
<item> <item>
<widget class="QCheckBox" name="silenceAllLogs"> <widget class="QCheckBox" name="silenceAllLogs">
<property name="text"> <property name="text">

View file

@ -78,7 +78,7 @@ public:
const QString set_daz_and_ftz = tr("Never use this."); const QString set_daz_and_ftz = tr("Never use this.");
const QString accurate_getllar = tr("Never use this."); const QString accurate_getllar = tr("Never use this.");
const QString accurate_putlluc = tr("Never use this."); const QString accurate_putlluc = tr("Never use this.");
const QString accurate_ppu_fma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support."); const QString accurate_llvm_dfma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support.");
const QString accurate_rsx_access = tr("Never use this."); const QString accurate_rsx_access = tr("Never use this.");
const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental."); const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental.");
const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option."); const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option.");