mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 02:08:49 +12:00
444 lines
13 KiB
C++
444 lines
13 KiB
C++
#include "stdafx.h"
|
|
#include "GLShaderInterpreter.h"
|
|
#include "GLGSRender.h"
|
|
#include "GLVertexProgram.h"
|
|
#include "GLFragmentProgram.h"
|
|
#include "../Common/ShaderInterpreter.h"
|
|
#include "../Common/GLSLCommon.h"
|
|
|
|
namespace gl
|
|
{
|
|
using glsl::shader;
|
|
|
|
namespace interpreter
|
|
{
|
|
void texture_pool_allocator::create(::glsl::program_domain domain)
|
|
{
|
|
GLenum pname;
|
|
switch (domain)
|
|
{
|
|
default:
|
|
rsx_log.fatal("Unexpected program domain %d", static_cast<int>(domain));
|
|
case ::glsl::program_domain::glsl_vertex_program:
|
|
pname = GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS; break;
|
|
case ::glsl::program_domain::glsl_fragment_program:
|
|
pname = GL_MAX_TEXTURE_IMAGE_UNITS; break;
|
|
}
|
|
|
|
glGetIntegerv(pname, &max_image_units);
|
|
}
|
|
|
|
void texture_pool_allocator::allocate(int size)
|
|
{
|
|
if ((used + size) > max_image_units)
|
|
{
|
|
rsx_log.fatal("Out of image binding slots!");
|
|
}
|
|
|
|
used += size;
|
|
texture_pool pool;
|
|
pool.pool_size = size;
|
|
pools.push_back(pool);
|
|
}
|
|
}
|
|
|
|
void shader_interpreter::create()
|
|
{
|
|
build_vs();
|
|
build_program(::program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES);
|
|
build_program(::program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES | ::program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT);
|
|
}
|
|
|
|
void shader_interpreter::destroy()
|
|
{
|
|
for (auto& prog : m_program_cache)
|
|
{
|
|
prog.second->fs.remove();
|
|
prog.second->prog.remove();
|
|
}
|
|
|
|
m_vs.remove();
|
|
}
|
|
|
|
glsl::program* shader_interpreter::get(const interpreter::program_metadata& metadata)
|
|
{
|
|
// Build options
|
|
u64 opt = 0;
|
|
if (rsx::method_registers.alpha_test_enabled()) [[unlikely]]
|
|
{
|
|
switch (rsx::method_registers.alpha_func())
|
|
{
|
|
case rsx::comparison_function::always:
|
|
break;
|
|
case rsx::comparison_function::never:
|
|
return nullptr;
|
|
case rsx::comparison_function::greater_or_equal:
|
|
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE;
|
|
break;
|
|
case rsx::comparison_function::greater:
|
|
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G;
|
|
break;
|
|
case rsx::comparison_function::less_or_equal:
|
|
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE;
|
|
break;
|
|
case rsx::comparison_function::less:
|
|
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L;
|
|
break;
|
|
case rsx::comparison_function::equal:
|
|
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ;
|
|
break;
|
|
case rsx::comparison_function::not_equal:
|
|
opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT;
|
|
if (rsx::method_registers.shader_control() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT;
|
|
if (rsx::method_registers.shader_control() & RSX_SHADER_CONTROL_USES_KIL) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL;
|
|
if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES;
|
|
if (metadata.has_branch_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL;
|
|
if (metadata.has_pack_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING;
|
|
|
|
if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]]
|
|
{
|
|
m_current_interpreter = it->second.get();
|
|
}
|
|
else
|
|
{
|
|
m_current_interpreter = build_program(opt);
|
|
}
|
|
|
|
return &m_current_interpreter->prog;
|
|
}
|
|
|
|
void shader_interpreter::build_vs()
|
|
{
|
|
::glsl::shader_properties properties{};
|
|
properties.domain = ::glsl::program_domain::glsl_vertex_program;
|
|
properties.require_lit_emulation = true;
|
|
|
|
// TODO: Extend decompiler thread
|
|
// TODO: Rename decompiler thread, it no longer spawns a thread
|
|
RSXVertexProgram null_prog;
|
|
std::string shader_str;
|
|
ParamArray arr;
|
|
GLVertexDecompilerThread comp(null_prog, shader_str, arr);
|
|
|
|
std::stringstream builder;
|
|
comp.insertHeader(builder);
|
|
|
|
builder << "#define Z_NEGATIVE_ONE_TO_ONE\n\n";
|
|
|
|
comp.insertConstants(builder, {});
|
|
comp.insertInputs(builder, {});
|
|
|
|
// Insert vp stream input
|
|
builder << "\n"
|
|
"layout(std140, binding = " << GL_INTERPRETER_VERTEX_BLOCK << ") readonly restrict buffer VertexInstructionBlock\n"
|
|
"{\n"
|
|
" uint base_address;\n"
|
|
" uint entry;\n"
|
|
" uint output_mask;\n"
|
|
" uint control;\n"
|
|
" uvec4 vp_instructions[];\n"
|
|
"};\n\n";
|
|
|
|
::glsl::insert_glsl_legacy_function(builder, properties);
|
|
::glsl::insert_vertex_input_fetch(builder, ::glsl::glsl_rules::glsl_rules_opengl4);
|
|
|
|
builder << program_common::interpreter::get_vertex_interpreter();
|
|
const std::string s = builder.str();
|
|
|
|
m_vs.create(::glsl::program_domain::glsl_vertex_program, s);
|
|
m_vs.compile();
|
|
}
|
|
|
|
void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data)
|
|
{
|
|
// Allocate TIUs
|
|
auto& allocator = prog_data.allocator;
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
|
|
{
|
|
allocator.create(::glsl::program_domain::glsl_fragment_program);
|
|
if (allocator.max_image_units >= 32)
|
|
{
|
|
// 16 + 4 + 4 + 4
|
|
allocator.allocate(4); // 1D
|
|
allocator.allocate(16); // 2D
|
|
allocator.allocate(4); // CUBE
|
|
allocator.allocate(4); // 3D
|
|
}
|
|
else if (allocator.max_image_units >= 24)
|
|
{
|
|
// 16 + 4 + 2 + 2
|
|
allocator.allocate(2); // 1D
|
|
allocator.allocate(16); // 2D
|
|
allocator.allocate(2); // CUBE
|
|
allocator.allocate(4); // 3D
|
|
}
|
|
else if (allocator.max_image_units >= 16)
|
|
{
|
|
// 10 + 2 + 2 + 2
|
|
allocator.allocate(2); // 1D
|
|
allocator.allocate(10); // 2D
|
|
allocator.allocate(2); // CUBE
|
|
allocator.allocate(2); // 3D
|
|
}
|
|
else
|
|
{
|
|
// Unusable
|
|
rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter.");
|
|
}
|
|
}
|
|
|
|
::glsl::shader_properties properties{};
|
|
properties.domain = ::glsl::program_domain::glsl_fragment_program;
|
|
properties.require_depth_conversion = true;
|
|
properties.require_wpos = true;
|
|
|
|
u32 len;
|
|
ParamArray arr;
|
|
std::string shader_str;
|
|
RSXFragmentProgram frag;
|
|
GLFragmentDecompilerThread comp(shader_str, arr, frag, len);
|
|
|
|
std::stringstream builder;
|
|
builder <<
|
|
"#version 450\n"
|
|
"#extension GL_ARB_bindless_texture : require\n\n";
|
|
|
|
::glsl::insert_subheader_block(builder);
|
|
comp.insertConstants(builder);
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE)
|
|
{
|
|
builder << "#define ALPHA_TEST_GEQUAL\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G)
|
|
{
|
|
builder << "#define ALPHA_TEST_GREATER\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE)
|
|
{
|
|
builder << "#define ALPHA_TEST_LEQUAL\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L)
|
|
{
|
|
builder << "#define ALPHA_TEST_LESS\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ)
|
|
{
|
|
builder << "#define ALPHA_TEST_EQUAL\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE)
|
|
{
|
|
builder << "#define ALPHA_TEST_NEQUAL\n";
|
|
}
|
|
|
|
if (!(compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT))
|
|
{
|
|
builder << "#define WITH_HALF_OUTPUT_REGISTER\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT)
|
|
{
|
|
builder << "#define WITH_DEPTH_EXPORT\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL)
|
|
{
|
|
builder << "#define WITH_FLOW_CTRL\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING)
|
|
{
|
|
builder << "#define WITH_PACKING\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL)
|
|
{
|
|
builder << "#define WITH_KIL\n";
|
|
}
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
|
|
{
|
|
builder << "#define WITH_TEXTURES\n\n";
|
|
|
|
const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" };
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n";
|
|
}
|
|
|
|
builder << "\n"
|
|
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
|
|
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
|
|
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"
|
|
"#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n"
|
|
"#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n";
|
|
}
|
|
else if (compiler_options)
|
|
{
|
|
builder << "\n";
|
|
}
|
|
|
|
builder <<
|
|
"layout(std430, binding =" << GL_INTERPRETER_FRAGMENT_BLOCK << ") readonly restrict buffer FragmentInstructionBlock\n"
|
|
"{\n"
|
|
" uint shader_control;\n"
|
|
" uint texture_control;\n"
|
|
" uint reserved1;\n"
|
|
" uint reserved2;\n"
|
|
" uint texture_handles[16];\n"
|
|
" uvec4 fp_instructions[];\n"
|
|
"};\n\n";
|
|
|
|
builder << program_common::interpreter::get_fragment_interpreter();
|
|
const std::string s = builder.str();
|
|
|
|
prog_data.fs.create(::glsl::program_domain::glsl_fragment_program, s);
|
|
prog_data.fs.compile();
|
|
}
|
|
|
|
interpreter::cached_program* shader_interpreter::build_program(u64 compiler_options)
|
|
{
|
|
auto data = new interpreter::cached_program();
|
|
build_fs(compiler_options, *data);
|
|
|
|
data->prog.create().
|
|
attach(m_vs).
|
|
attach(data->fs).
|
|
link();
|
|
|
|
data->prog.uniforms[0] = GL_STREAM_BUFFER_START + 0;
|
|
data->prog.uniforms[1] = GL_STREAM_BUFFER_START + 1;
|
|
|
|
if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES)
|
|
{
|
|
// Initialize texture bindings
|
|
int assigned = 0;
|
|
auto& allocator = data->allocator;
|
|
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
|
|
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
for (int j = 0; j < allocator.pools[i].pool_size; ++j)
|
|
{
|
|
allocator.pools[i].allocate(assigned++);
|
|
}
|
|
|
|
data->prog.uniforms[type_names[i]] = allocator.pools[i].allocated;
|
|
}
|
|
}
|
|
|
|
m_program_cache[compiler_options].reset(data);
|
|
return data;
|
|
}
|
|
|
|
bool shader_interpreter::is_interpreter(const glsl::program* program)
|
|
{
|
|
return (program == &m_current_interpreter->prog);
|
|
}
|
|
|
|
void shader_interpreter::update_fragment_textures(
|
|
const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors,
|
|
u16 reference_mask, u32* out)
|
|
{
|
|
if (reference_mask == 0 || !m_current_interpreter)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Reset allocation
|
|
auto& allocator = m_current_interpreter->allocator;
|
|
for (unsigned i = 0; i < 4; ++i)
|
|
{
|
|
allocator.pools[i].num_used = 0;
|
|
allocator.pools[i].flags = 0;
|
|
}
|
|
|
|
rsx::simple_array<std::pair<int, int>> replacement_map;
|
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
|
{
|
|
if (reference_mask & (1 << i))
|
|
{
|
|
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(descriptors[i].get());
|
|
verify(HERE), sampler_state;
|
|
|
|
int pool_id = static_cast<int>(sampler_state->image_type);
|
|
auto& pool = allocator.pools[pool_id];
|
|
|
|
const int old = pool.allocated[pool.num_used];
|
|
if (!pool.allocate(i))
|
|
{
|
|
rsx_log.error("Could not allocate texture resource for shader interpreter.");
|
|
break;
|
|
}
|
|
|
|
out[i] = (pool.num_used - 1);
|
|
if (old != i)
|
|
{
|
|
// Check if the candidate target has also been replaced
|
|
bool found = false;
|
|
for (auto& e : replacement_map)
|
|
{
|
|
if (e.second == old)
|
|
{
|
|
// This replacement consumed this 'old' value
|
|
e.second = i;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found)
|
|
{
|
|
replacement_map.push_back({ old, i });
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
out[i] = 0xFF;
|
|
}
|
|
}
|
|
|
|
// Bind TIU locations
|
|
if (replacement_map.empty()) [[likely]]
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Overlapping texture bindings are trouble. Cannot bind one TIU to two types of samplers simultaneously
|
|
for (unsigned i = 0; i < replacement_map.size(); ++i)
|
|
{
|
|
for (int j = 0; j < 4; ++j)
|
|
{
|
|
auto& pool = allocator.pools[j];
|
|
for (int k = pool.num_used; k < pool.pool_size; ++k)
|
|
{
|
|
if (pool.allocated[k] == replacement_map[i].second)
|
|
{
|
|
pool.allocated[k] = replacement_map[i].first;
|
|
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
|
|
|
|
// Exit nested loop
|
|
j = 4;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (allocator.pools[0].flags) m_current_interpreter->prog.uniforms["sampler1D_array"] = allocator.pools[0].allocated;
|
|
if (allocator.pools[1].flags) m_current_interpreter->prog.uniforms["sampler2D_array"] = allocator.pools[1].allocated;
|
|
if (allocator.pools[2].flags) m_current_interpreter->prog.uniforms["samplerCube_array"] = allocator.pools[2].allocated;
|
|
if (allocator.pools[3].flags) m_current_interpreter->prog.uniforms["sampler3D_array"] = allocator.pools[3].allocated;
|
|
}
|
|
}
|