mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 02:08:49 +12:00
rsx: Use native half float types if available
- Emulating f16 with f32 is not ideal and requires a lot of value clamping - Using native data type can significantly improve performance and accuracy - With openGL, check for the compatible extensions NV_gpu_shader5 and AMD_gpu_shader_half_float - With Vulkan, enable this functionality in the deviceFeatures if applicable. (VK_KHR_shader_float16_int8 extension) - Temporarily disable hw fp16 for vulkan
This commit is contained in:
parent
ee319f7c13
commit
a668560c68
14 changed files with 435 additions and 236 deletions
|
@ -2,8 +2,8 @@
|
|||
#include <set>
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "Emu/System.h"
|
||||
#include "GLHelpers.h"
|
||||
#include "GLFragmentProgram.h"
|
||||
#include "../Common/ProgramStateCache.h"
|
||||
#include "GLCommonDecompiler.h"
|
||||
#include "../GCM.h"
|
||||
|
||||
|
@ -13,6 +13,11 @@ std::string GLFragmentDecompilerThread::getFloatTypeName(size_t elementCount)
|
|||
return glsl::getFloatTypeNameImpl(elementCount);
|
||||
}
|
||||
|
||||
std::string GLFragmentDecompilerThread::getHalfTypeName(size_t elementCount)
|
||||
{
|
||||
return glsl::getHalfTypeNameImpl(elementCount);
|
||||
}
|
||||
|
||||
std::string GLFragmentDecompilerThread::getFunction(FUNCTION f)
|
||||
{
|
||||
return glsl::getFunctionImpl(f);
|
||||
|
@ -31,6 +36,19 @@ std::string GLFragmentDecompilerThread::compareFunction(COMPARE f, const std::st
|
|||
void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS)
|
||||
{
|
||||
OS << "#version 430\n";
|
||||
|
||||
if (device_props.has_native_half_support)
|
||||
{
|
||||
const auto driver_caps = gl::get_driver_caps();
|
||||
if (driver_caps.NV_gpu_shader5_supported)
|
||||
{
|
||||
OS << "#extension GL_NV_gpu_shader5: require\n";
|
||||
}
|
||||
else if (driver_caps.AMD_gpu_shader_half_float_supported)
|
||||
{
|
||||
OS << "#extension GL_AMD_gpu_shader_half_float: require\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLFragmentDecompilerThread::insertInputs(std::stringstream & OS)
|
||||
|
@ -92,9 +110,10 @@ void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
|
|||
{ "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" },
|
||||
};
|
||||
|
||||
const auto reg_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? "vec4" : getHalfTypeName(4);
|
||||
for (int i = 0; i < std::size(table); ++i)
|
||||
{
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second))
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second))
|
||||
OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n";
|
||||
}
|
||||
}
|
||||
|
@ -206,14 +225,16 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
|||
};
|
||||
|
||||
std::string parameters = "";
|
||||
const auto half4 = getHalfTypeName(4);
|
||||
for (auto ®_name : output_values)
|
||||
{
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name))
|
||||
const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
|
||||
{
|
||||
if (parameters.length())
|
||||
parameters += ", ";
|
||||
|
||||
parameters += "inout vec4 " + reg_name;
|
||||
parameters += "inout " + type + " " + reg_name;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -307,21 +328,24 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
|
|||
OS << "{\n";
|
||||
|
||||
std::string parameters = "";
|
||||
const auto half4 = getHalfTypeName(4);
|
||||
|
||||
for (auto ®_name : output_values)
|
||||
{
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name))
|
||||
const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
|
||||
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
|
||||
{
|
||||
if (parameters.length())
|
||||
parameters += ", ";
|
||||
|
||||
parameters += reg_name;
|
||||
OS << " vec4 " << reg_name << " = vec4(0.);\n";
|
||||
OS << " " << type << " " << reg_name << " = " << type << "(0.);\n";
|
||||
}
|
||||
}
|
||||
|
||||
OS << "\n" << " fs_main(" + parameters + ");\n\n";
|
||||
|
||||
glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS));
|
||||
glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS), device_props.has_native_half_support);
|
||||
|
||||
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
|
||||
{
|
||||
|
@ -359,7 +383,15 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog)
|
|||
{
|
||||
u32 size;
|
||||
GLFragmentDecompilerThread decompiler(shader, parr, prog, size);
|
||||
|
||||
if (!g_cfg.video.disable_native_float16)
|
||||
{
|
||||
const auto driver_caps = gl::get_driver_caps();
|
||||
decompiler.device_props.has_native_half_support = driver_caps.NV_gpu_shader5_supported || driver_caps.AMD_gpu_shader_half_float_supported;
|
||||
}
|
||||
|
||||
decompiler.Task();
|
||||
|
||||
for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM])
|
||||
{
|
||||
for (const ParamItem& PI : PT.items)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue