rsx/fp: Re-design register write tracking

- Always collapse register writes when exporting FS outputs
This commit is contained in:
kd-11 2025-01-04 01:57:03 +03:00 committed by kd-11
parent 88e13d8326
commit dce0abc8b9
9 changed files with 400 additions and 130 deletions

View file

@ -0,0 +1,111 @@
#pragma once
#include <util/types.hpp>
namespace rsx
{
class MixedPrecisionRegister
{
enum data_type_bits
{
undefined = 0,
f16 = 1,
f32 = 2
};
std::array<data_type_bits, 8> content_mask; // Content details for each half-word
u32 file_index = umax;
void tag_h0(bool x, bool y, bool z, bool w);
void tag_h1(bool x, bool y, bool z, bool w);
void tag_r(bool x, bool y, bool z, bool w);
std::string fetch_halfreg(u32 word_index) const;
public:
MixedPrecisionRegister();
void tag(u32 index, bool is_fp16, bool x, bool y, bool z, bool w);
std::string gather_r() const;
std::string split_h0() const
{
return fetch_halfreg(0);
}
std::string split_h1() const
{
return fetch_halfreg(1);
}
// Getters
// Return true if all values are unwritten to (undefined)
bool floating() const
{
return file_index == umax;
}
// Return true if the first half register is all undefined
bool floating_h0() const
{
return content_mask[0] == content_mask[1] &&
content_mask[1] == content_mask[2] &&
content_mask[2] == content_mask[3] &&
content_mask[3] == data_type_bits::undefined;
}
// Return true if the second half register is all undefined
bool floating_h1() const
{
return content_mask[4] == content_mask[5] &&
content_mask[5] == content_mask[6] &&
content_mask[6] == content_mask[7] &&
content_mask[7] == data_type_bits::undefined;
}
// Return true if any of the half-words are 16-bit
bool requires_gather(u8 channel) const
{
// Data fetched from the single precision register requires merging of the two half registers
const auto channel_offset = channel * 2;
ensure(channel_offset <= 6);
return (content_mask[channel_offset] == data_type_bits::f16 || content_mask[channel_offset + 1] == data_type_bits::f16);
}
// Return true if the entire 128-bit register is filled with 2xfp16x4 data words
bool requires_gather128() const
{
// Full 128-bit check
for (const auto& ch : content_mask)
{
if (ch == data_type_bits::f16)
{
return true;
}
}
return false;
}
// Return true if the half-register is polluted with fp32 data
bool requires_split(u32 word_index) const
{
const u32 content_offset = word_index * 4;
for (u32 i = 0; i < 4; ++i)
{
if (content_mask[content_offset + i] == data_type_bits::f32)
{
return true;
}
}
return false;
}
};
}