rsx: Use native half float types if available

- Emulating f16 with f32 is not ideal and requires a lot of value clamping
- Using native data type can significantly improve performance and accuracy
- With openGL, check for the compatible extensions NV_gpu_shader5 and
AMD_gpu_shader_half_float
- With Vulkan, enable this functionality in the deviceFeatures if
applicable. (VK_KHR_shader_float16_int8 extension)
- Temporarily disable hw fp16 for vulkan
This commit is contained in:
kd-11 2019-04-13 00:25:44 +03:00 committed by kd-11
parent ee319f7c13
commit a668560c68
14 changed files with 435 additions and 236 deletions

View file

@ -2,8 +2,8 @@
#include <set>
#include "Emu/Memory/vm.h"
#include "Emu/System.h"
#include "GLHelpers.h"
#include "GLFragmentProgram.h"
#include "../Common/ProgramStateCache.h"
#include "GLCommonDecompiler.h"
#include "../GCM.h"
@ -13,6 +13,11 @@ std::string GLFragmentDecompilerThread::getFloatTypeName(size_t elementCount)
return glsl::getFloatTypeNameImpl(elementCount);
}
std::string GLFragmentDecompilerThread::getHalfTypeName(size_t elementCount)
{
return glsl::getHalfTypeNameImpl(elementCount);
}
std::string GLFragmentDecompilerThread::getFunction(FUNCTION f)
{
return glsl::getFunctionImpl(f);
@ -31,6 +36,19 @@ std::string GLFragmentDecompilerThread::compareFunction(COMPARE f, const std::st
void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS)
{
OS << "#version 430\n";
if (device_props.has_native_half_support)
{
const auto driver_caps = gl::get_driver_caps();
if (driver_caps.NV_gpu_shader5_supported)
{
OS << "#extension GL_NV_gpu_shader5: require\n";
}
else if (driver_caps.AMD_gpu_shader_half_float_supported)
{
OS << "#extension GL_AMD_gpu_shader_half_float: require\n";
}
}
}
void GLFragmentDecompilerThread::insertInputs(std::stringstream & OS)
@ -92,9 +110,10 @@ void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
{ "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" },
};
const auto reg_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? "vec4" : getHalfTypeName(4);
for (int i = 0; i < std::size(table); ++i)
{
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second))
if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second))
OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n";
}
}
@ -206,14 +225,16 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
};
std::string parameters = "";
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_values)
{
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name))
const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
{
if (parameters.length())
parameters += ", ";
parameters += "inout vec4 " + reg_name;
parameters += "inout " + type + " " + reg_name;
}
}
@ -307,21 +328,24 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "{\n";
std::string parameters = "";
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_values)
{
if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name))
const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
{
if (parameters.length())
parameters += ", ";
parameters += reg_name;
OS << " vec4 " << reg_name << " = vec4(0.);\n";
OS << " " << type << " " << reg_name << " = " << type << "(0.);\n";
}
}
OS << "\n" << " fs_main(" + parameters + ");\n\n";
glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS));
glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS), device_props.has_native_half_support);
if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{
@ -359,7 +383,15 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog)
{
u32 size;
GLFragmentDecompilerThread decompiler(shader, parr, prog, size);
if (!g_cfg.video.disable_native_float16)
{
const auto driver_caps = gl::get_driver_caps();
decompiler.device_props.has_native_half_support = driver_caps.NV_gpu_shader5_supported || driver_caps.AMD_gpu_shader_half_float_supported;
}
decompiler.Task();
for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM])
{
for (const ParamItem& PI : PT.items)