rpcs3/rpcs3/Emu/RSX/Program/FragmentProgramRegister.cpp
kd-11 dce0abc8b9 rsx/fp: Re-design register write tracking
- Always collapse register writes when exporting FS outputs
2025-01-10 04:34:28 +03:00

196 lines
5.1 KiB
C++

#include "stdafx.h"
#include "FragmentProgramRegister.h"
namespace rsx
{
MixedPrecisionRegister::MixedPrecisionRegister()
{
std::fill(content_mask.begin(), content_mask.end(), data_type_bits::undefined);
}
void MixedPrecisionRegister::tag_h0(bool x, bool y, bool z, bool w)
{
if (x) content_mask[0] = data_type_bits::f16;
if (y) content_mask[1] = data_type_bits::f16;
if (z) content_mask[2] = data_type_bits::f16;
if (w) content_mask[3] = data_type_bits::f16;
}
void MixedPrecisionRegister::tag_h1(bool x, bool y, bool z, bool w)
{
if (x) content_mask[4] = data_type_bits::f16;
if (y) content_mask[5] = data_type_bits::f16;
if (z) content_mask[6] = data_type_bits::f16;
if (w) content_mask[7] = data_type_bits::f16;
}
void MixedPrecisionRegister::tag_r(bool x, bool y, bool z, bool w)
{
if (x) content_mask[0] = content_mask[1] = data_type_bits::f32;
if (y) content_mask[2] = content_mask[3] = data_type_bits::f32;
if (z) content_mask[4] = content_mask[5] = data_type_bits::f32;
if (w) content_mask[6] = content_mask[7] = data_type_bits::f32;
}
void MixedPrecisionRegister::tag(u32 index, bool is_fp16, bool x, bool y, bool z, bool w)
{
if (file_index == umax)
{
// First-time use. Initialize...
const u32 real_index = is_fp16 ? (index >> 1) : index;
file_index = real_index;
}
if (is_fp16)
{
ensure((index / 2) == file_index);
if (index & 1)
{
tag_h1(x, y, z, w);
return;
}
tag_h0(x, y, z, w);
return;
}
tag_r(x, y, z, w);
}
std::string MixedPrecisionRegister::gather_r() const
{
const auto half_index = file_index << 1;
const std::string reg = "r" + std::to_string(file_index);
const std::string gather_half_regs[] = {
"gather(h" + std::to_string(half_index) + ")",
"gather(h" + std::to_string(half_index + 1) + ")"
};
std::string outputs[4];
for (int ch = 0; ch < 4; ++ch)
{
// FIXME: This approach ignores mixed register bits. Not ideal!!!!
const auto channel0 = content_mask[ch * 2];
const auto is_fp16_ch = channel0 == content_mask[ch * 2 + 1] && channel0 == data_type_bits::f16;
outputs[ch] = is_fp16_ch ? gather_half_regs[ch / 2] : reg;
}
// Grouping. Only replace relevant bits...
if (outputs[0] == outputs[1]) outputs[0] = "";
if (outputs[2] == outputs[3]) outputs[2] = "";
// Assemble
bool group = false;
std::string result = "";
constexpr std::string_view swz_mask = "xyzw";
for (int ch = 0; ch < 4; ++ch)
{
if (outputs[ch].empty())
{
group = true;
continue;
}
if (!result.empty())
{
result += ", ";
}
if (group)
{
ensure(ch > 0);
group = false;
if (outputs[ch] == reg)
{
result += reg + "." + swz_mask[ch - 1] + swz_mask[ch];
continue;
}
result += outputs[ch];
continue;
}
const int subch = outputs[ch] == reg ? ch : (ch % 2); // Avoid .xyxy.z and other such ugly swizzles
result += outputs[ch] + "." + swz_mask[subch];
}
// Optimize dual-gather (128-bit gather) to use special function
const std::string double_gather = gather_half_regs[0] + ", " + gather_half_regs[1];
if (result == double_gather)
{
result = "gather(h" + std::to_string(half_index) + ", h" + std::to_string(half_index + 1) + ")";
}
return "(" + result + ")";
}
std::string MixedPrecisionRegister::fetch_halfreg(u32 word_index) const
{
// Reads half-word 0 (H16x4) from a full real (R32x4) register
constexpr std::string_view swz_mask = "xyzw";
const std::string reg = "r" + std::to_string(file_index);
const std::string hreg = "h" + std::to_string(file_index * 2 + word_index);
const std::string word0_bits = "floatBitsToUint(" + reg + "." + swz_mask[word_index * 2] + ")";
const std::string word1_bits = "floatBitsToUint(" + reg + "." + swz_mask[word_index * 2 + 1] + ")";
const std::string words[] = {
"unpackHalf2x16(" + word0_bits + ")",
"unpackHalf2x16(" + word1_bits + ")"
};
// Assemble
std::string outputs[4];
ensure(word_index <= 1);
const int word_offset = word_index * 4;
for (int ch = 0; ch < 4; ++ch)
{
outputs[ch] = content_mask[ch + word_offset] == data_type_bits::f32
? words[ch / 2]
: hreg;
}
// Grouping. Only replace relevant bits...
if (outputs[0] == outputs[1]) outputs[0] = "";
if (outputs[2] == outputs[3]) outputs[2] = "";
// Assemble
bool group = false;
std::string result = "";
for (int ch = 0; ch < 4; ++ch)
{
if (outputs[ch].empty())
{
group = true;
continue;
}
if (!result.empty())
{
result += ", ";
}
if (group)
{
ensure(ch > 0);
group = false;
result += outputs[ch];
if (outputs[ch] == hreg)
{
result += std::string(".") + swz_mask[ch - 1] + swz_mask[ch];
}
continue;
}
const int subch = outputs[ch] == hreg ? ch : (ch % 2); // Avoid .xyxy.z and other such ugly swizzles
result += outputs[ch] + "." + swz_mask[subch];
}
return "(" + result + ")";
}
}