mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-02 13:01:27 +12:00
SPU Anlyzer: Detect RdDec pseudo-reads loops
This commit is contained in:
parent
0724fa3aa9
commit
5f017fb460
2 changed files with 425 additions and 129 deletions
|
@ -69,6 +69,59 @@ struct span_less
|
|||
template <typename T>
|
||||
inline constexpr span_less<T> s_span_less{};
|
||||
|
||||
// Just madness to keep some members uninitialized and get zero initialization otherwise
|
||||
template <typename T>
|
||||
struct alignas(T) un_t
|
||||
{
|
||||
std::byte data[sizeof(T)];
|
||||
|
||||
T* get() noexcept
|
||||
{
|
||||
return std::launder(reinterpret_cast<T*>(+data));
|
||||
}
|
||||
|
||||
const T* get() const noexcept
|
||||
{
|
||||
return std::launder(reinterpret_cast<const T*>(+data));
|
||||
}
|
||||
|
||||
T& operator =(const T& r) noexcept
|
||||
{
|
||||
return *get() = r;
|
||||
}
|
||||
|
||||
T* operator ->() noexcept
|
||||
{
|
||||
return get();
|
||||
}
|
||||
|
||||
const T* operator ->() const noexcept
|
||||
{
|
||||
return get();
|
||||
}
|
||||
|
||||
operator T&() noexcept
|
||||
{
|
||||
return *get();
|
||||
}
|
||||
|
||||
operator const T&() const noexcept
|
||||
{
|
||||
return *get();
|
||||
}
|
||||
|
||||
static void init(un_t& un)
|
||||
{
|
||||
new (un.data) T();
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
get()->~T();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Move 4 args for calling native function from a GHC calling convention function
|
||||
#if defined(ARCH_X64)
|
||||
static u8* move_args_ghc_to_native(u8* raw)
|
||||
|
@ -2557,13 +2610,13 @@ using vf = spu_recompiler_base::vf;
|
|||
|
||||
bool reg_state_t::is_const() const
|
||||
{
|
||||
return !!(flag & vf::is_const);
|
||||
return !(flag & vf::is_mask) && bit_range == 0 && value_range == 1;
|
||||
}
|
||||
|
||||
bool reg_state_t::compare_tags(const reg_state_t& rhs) const
|
||||
{
|
||||
// Compare by tag, address of instruction origin
|
||||
return tag == rhs.tag && origin == rhs.origin && is_instruction == rhs.is_instruction;
|
||||
// Compare by tag, address of instruction origin
|
||||
return value == rhs.value && tag == rhs.tag && origin == rhs.origin && is_instruction == rhs.is_instruction;
|
||||
}
|
||||
|
||||
bool reg_state_t::operator&(vf to_test) const
|
||||
|
@ -2578,7 +2631,7 @@ bool reg_state_t::is_less_than(u32 imm) const
|
|||
return true;
|
||||
}
|
||||
|
||||
if (~known_zeroes < imm)
|
||||
if (~get_known_zeroes() < imm)
|
||||
{
|
||||
// The highest number possible within the mask's limit is less than imm
|
||||
return true;
|
||||
|
@ -2589,12 +2642,12 @@ bool reg_state_t::is_less_than(u32 imm) const
|
|||
|
||||
bool reg_state_t::operator==(const reg_state_t& r) const
|
||||
{
|
||||
if ((flag ^ r.flag) - (vf::is_null + vf::is_mask))
|
||||
if ((flag ^ r.flag) - (vf::is_null))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return (flag & vf::is_const ? value == r.value : (compare_tags(r) && known_ones == r.known_ones && known_zeroes == r.known_zeroes));
|
||||
return (flag.is_const() ? value == r.value : (compare_tags(r) && bit_range == r.bit_range && value_range == r.value_range));
|
||||
}
|
||||
|
||||
bool reg_state_t::operator==(u32 imm) const
|
||||
|
@ -2610,7 +2663,7 @@ bool reg_state_t::compare_with_mask_indifference(const reg_state_t& r, u32 mask_
|
|||
return true;
|
||||
}
|
||||
|
||||
if ((r.flag & flag) & vf::is_const)
|
||||
if (r.is_const() || is_const())
|
||||
{
|
||||
// Simplified path for consts
|
||||
if (((value ^ r.value) & mask_bits) == 0)
|
||||
|
@ -2628,8 +2681,8 @@ bool reg_state_t::compare_with_mask_indifference(const reg_state_t& r, u32 mask_
|
|||
return true;
|
||||
}
|
||||
|
||||
const auto _this = this->downgrade();
|
||||
const auto _r = r.downgrade();
|
||||
const auto& _this = *this;
|
||||
const auto& _r = r;
|
||||
|
||||
const bool is_mask_equal = (_this.compare_tags(_r) && _this.flag == _r.flag && !((_this.known_ones ^ _r.known_ones) & mask_bits) && !((_this.known_zeroes ^ _r.known_zeroes) & mask_bits));
|
||||
|
||||
|
@ -2643,7 +2696,7 @@ bool reg_state_t::compare_with_mask_indifference(u32 imm, u32 mask_bits) const
|
|||
return true;
|
||||
}
|
||||
|
||||
if (flag & vf::is_const)
|
||||
if (is_const())
|
||||
{
|
||||
if (((value ^ imm) & mask_bits) == 0)
|
||||
{
|
||||
|
@ -2662,7 +2715,7 @@ bool reg_state_t::unequal_with_mask_indifference(const reg_state_t& r, u32 mask_
|
|||
return true;
|
||||
}
|
||||
|
||||
if ((r.flag & flag) & vf::is_const)
|
||||
if (r.is_const() && is_const())
|
||||
{
|
||||
// Simplified path for consts
|
||||
if ((value ^ r.value) & mask_bits)
|
||||
|
@ -2696,21 +2749,6 @@ bool reg_state_t::unequal_with_mask_indifference(const reg_state_t& r, u32 mask_
|
|||
return (((_this.known_ones ^ _r.known_ones) & mask_bits) & ((_this.known_zeroes ^ _r.known_zeroes) & mask_bits)) != 0;
|
||||
}
|
||||
|
||||
reg_state_t reg_state_t::downgrade() const
|
||||
{
|
||||
if (flag & vf::is_const)
|
||||
{
|
||||
return reg_state_t{vf::is_mask, 0, umax, this->value, ~this->value, this->origin};
|
||||
}
|
||||
|
||||
if (!(flag - vf::is_null))
|
||||
{
|
||||
return reg_state_t{vf::is_mask, 0, this->tag, 0, 0, this->origin};
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
reg_state_t reg_state_t::merge(const reg_state_t& rhs, u32 current_pc) const
|
||||
{
|
||||
if (rhs == *this)
|
||||
|
@ -2719,18 +2757,17 @@ reg_state_t reg_state_t::merge(const reg_state_t& rhs, u32 current_pc) const
|
|||
return rhs;
|
||||
}
|
||||
|
||||
if ((rhs.flag + flag).all_of(vf::is_const + vf::is_mask))
|
||||
if ((rhs.bit_range | bit_range) != umax)
|
||||
{
|
||||
// Try to downgrade to a known-bits type value
|
||||
const reg_state_t _rhs = rhs.downgrade();
|
||||
const reg_state_t _this = this->downgrade();
|
||||
const reg_state_t& _rhs = rhs;
|
||||
const reg_state_t& _this = *this;
|
||||
|
||||
if ((_rhs.flag & _this.flag) & vf::is_mask)
|
||||
{
|
||||
// Now it is possible to merge the two values
|
||||
reg_state_t res{vf::is_mask, 0, 0, _rhs.known_ones & _this.known_ones, _rhs.known_zeroes & _this.known_zeroes};
|
||||
reg_state_t res{{}, 0, 0, _rhs.known_ones & _this.known_ones, _rhs.known_zeroes & _this.known_zeroes};
|
||||
|
||||
if (res.known_zeroes | res.known_ones)
|
||||
if (res.bit_range != umax)
|
||||
{
|
||||
// Success (create new value tag)
|
||||
res.tag = reg_state_t::alloc_tag();
|
||||
|
@ -2763,23 +2800,13 @@ reg_state_t reg_state_t::build_on_top_of(const reg_state_t& rhs) const
|
|||
|
||||
u32 reg_state_t::get_known_zeroes() const
|
||||
{
|
||||
if (flag & vf::is_const)
|
||||
{
|
||||
return ~value;
|
||||
}
|
||||
|
||||
return known_zeroes;
|
||||
return (~value) & ~bit_range;
|
||||
}
|
||||
|
||||
|
||||
u32 reg_state_t::get_known_ones() const
|
||||
{
|
||||
if (flag & vf::is_const)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
return known_ones;
|
||||
return value & ~bit_range;
|
||||
}
|
||||
|
||||
reg_state_t reg_state_t::from_value(u32 value) noexcept
|
||||
|
@ -3009,7 +3036,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
|
||||
// Analyse instruction
|
||||
switch (const auto type = g_spu_itype.decode(data))
|
||||
const auto type = g_spu_itype.decode(data);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case spu_itype::UNK:
|
||||
case spu_itype::DFCEQ:
|
||||
|
@ -4248,7 +4277,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
if (tb.chunk == block.chunk && tb.reg_origin[i] + 1)
|
||||
{
|
||||
const u32 expected = block.reg_mod[i] ? addr : block.reg_origin[i];
|
||||
const u32 expected = block.reg_origin[i] == umax || block.reg_mod[i] ? addr : block.reg_origin[i];
|
||||
|
||||
if (tb.reg_origin[i] == 0x80000000)
|
||||
{
|
||||
|
@ -4838,19 +4867,148 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
return map;
|
||||
};
|
||||
|
||||
struct putllc16_statistics_t
|
||||
struct break_stats_t
|
||||
{
|
||||
atomic_t<u64> all = 0;
|
||||
atomic_t<u64> single = 0;
|
||||
atomic_t<u64> nowrite = 0;
|
||||
std::array<atomic_t<u64>, 128> breaking_reason{};
|
||||
};
|
||||
|
||||
struct rchcnt_statistics_t
|
||||
struct putllc16_statistics_t : break_stats_t
|
||||
{
|
||||
atomic_t<u64> all = 0;
|
||||
atomic_t<u64> single = 0;
|
||||
std::array<atomic_t<u64>, 128> breaking_reason{};
|
||||
atomic_t<u64> nowrite = 0;
|
||||
};
|
||||
|
||||
struct rchcnt_statistics_t : break_stats_t
|
||||
{
|
||||
};
|
||||
|
||||
struct rddec_statistics_t : break_stats_t
|
||||
{
|
||||
};
|
||||
|
||||
struct alignas(u8) masked_regs_t
|
||||
{
|
||||
u8 mask[s_reg_max / 8]{};
|
||||
|
||||
masked_regs_t() noexcept = default;
|
||||
|
||||
explicit masked_regs_t(int) noexcept
|
||||
{
|
||||
for (u32 i = 0; i < s_reg_max / 8; i++)
|
||||
{
|
||||
mask[i] = umax;
|
||||
}
|
||||
}
|
||||
|
||||
bool test(usz index) const noexcept
|
||||
{
|
||||
return (mask[index / 8] >> (index % 8)) % 2 != 0;
|
||||
}
|
||||
|
||||
bool test_and_set(usz index) noexcept
|
||||
{
|
||||
const u64 or_mask = (1ull << (index % 8));
|
||||
const u64 old = mask[index / 8];
|
||||
mask[index / 8] |= or_mask;
|
||||
return (mask[index / 9] & or_mask) != 0;
|
||||
}
|
||||
|
||||
masked_regs_t join(const masked_regs_t& rhs)
|
||||
{
|
||||
masked_regs_t result = *this;
|
||||
for (u32 i = 0; i < s_reg_max / 8; i++)
|
||||
{
|
||||
result.mask[i] |= rhs.mask[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void init()
|
||||
{
|
||||
*this = {};
|
||||
}
|
||||
};
|
||||
|
||||
struct masked_reg_state_t : masked_regs_t
|
||||
{
|
||||
std::array<un_t<reg_state_t>, s_reg_max> state;
|
||||
|
||||
reg_state_t& get(usz index) noexcept
|
||||
{
|
||||
if (!test_and_set(index))
|
||||
{
|
||||
state[index]->init();
|
||||
}
|
||||
|
||||
return state[index];
|
||||
}
|
||||
|
||||
const reg_state_t& get(usz index) const noexcept
|
||||
{
|
||||
ensure(test(index));
|
||||
return state[index];
|
||||
}
|
||||
|
||||
reg_state_t& get(usz index) noexcept
|
||||
{
|
||||
ensure(test(index));
|
||||
return state[index];
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
auto compare_known(const masked_reg_state_t& rhs, Func& func)
|
||||
{
|
||||
for (int i = 0; i < s_reg_mask; i++)
|
||||
{
|
||||
if (test(i) && rhs.test(i))
|
||||
{
|
||||
const reg_state_t& a = get(i);
|
||||
const reg_state_t& b = rhs.get(i);
|
||||
|
||||
if (func(a, b))
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct masked_reg_origin_t : masked_regs_t
|
||||
{
|
||||
usz count_tracked = 0;
|
||||
u8 tracked_names[32]{};
|
||||
std::array<un_t<masked_regs_t>, 32> tracked_regs;
|
||||
masked_regs_t reg_unk{1};
|
||||
|
||||
masked_regs_t& get(u8 index) const noexcept
|
||||
{
|
||||
for (u32 i = 0; i < count_tracked; i++)
|
||||
{
|
||||
if (tracked_names[i] == index)
|
||||
{
|
||||
return *tracked_regs[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (count_tracked < std::size(tracked_names))
|
||||
{
|
||||
auto& ptr = tracked_regs[count_tracked++];
|
||||
ptr->init();
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
|
||||
return reg_unk;
|
||||
// // const v128 p1 = v128::loadu(tracked_regs, 0);
|
||||
// // const v128 p2 = v128::loadu(tracked_regs, 1);
|
||||
// // const v128 pindex = v128::from8p(index);
|
||||
|
||||
// // gv_eq8(p1,)
|
||||
// return state[index];
|
||||
}
|
||||
};
|
||||
|
||||
// Pattern structures
|
||||
|
@ -4932,6 +5090,13 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
};
|
||||
|
||||
struct rddec_noop_loop_t
|
||||
{
|
||||
bool active = false;
|
||||
u32 read_pc = SPU_LS_SIZE; // PC of RDCH (that encloses the loop)
|
||||
masked_reg_state_t current_state;
|
||||
};
|
||||
|
||||
// Reset tags
|
||||
reg_state_t::alloc_tag(true);
|
||||
|
||||
|
@ -4954,6 +5119,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
// RDCH/RCHCNT Loop analysis tracker
|
||||
rchcnt_loop_t rchcnt_loop{};
|
||||
|
||||
// RdDec (RDCH) Loop analysis tracker
|
||||
rddec_noop_loop_t rddec_loop{};
|
||||
|
||||
block_reg_state_iterator(u32 _pc, usz _parent_iterator_index = umax, usz _parent_target_index = 0) noexcept
|
||||
: pc(_pc)
|
||||
, parent_iterator_index(_parent_iterator_index)
|
||||
|
@ -4964,8 +5132,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
|
||||
std::vector<block_reg_state_iterator> reg_state_it;
|
||||
|
||||
std::map<u32, atomic16_t> atomic16_all; // RdAtomicStat location -> atomic loop optimization state
|
||||
std::map<u32, rchcnt_loop_t> rchcnt_loop_all; // RDCH/RCHCNT location -> channel read loop optimization state
|
||||
auto_typemap<spu_recompiler_base> pattern_map;
|
||||
|
||||
auto& atomic16_all = pattern_map.get<std::map<u32, atomic16_t>>(); // RdAtomicStat location -> atomic loop optimization state
|
||||
auto& rchcnt_loop_all = pattern_map.get<std::map<u32, rchcnt_loop_t>>(); // RDCH/RCHCNT location -> channel read loop optimization state
|
||||
auto& rddec_noop_loop_all = pattern_map.get<std::map<u32, rddec_noop_loop_t>>(); // RDCH/RCHCNT location -> channel read loop optimization state
|
||||
std::map<u32, bool> getllar_starts; // True for failed loops
|
||||
std::map<u32, bool> run_on_block;
|
||||
std::map<u32, bool> logged_block;
|
||||
|
@ -4973,7 +5144,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
std::array<reg_state_t, s_reg_max>* true_state_walkby = nullptr;
|
||||
|
||||
atomic16_t dummy16{};
|
||||
rchcnt_loop_t dummy_loop{};
|
||||
rchcnt_loop_t dummy_rdcnt{};
|
||||
rddec_noop_loop_t dummy_rddec{};
|
||||
|
||||
bool likely_putllc_loop = false;
|
||||
bool had_putllc_evaluation = false;
|
||||
|
@ -5087,7 +5259,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
|
||||
auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby;
|
||||
const auto atomic16 = is_pattern_match ? &::at32(reg_state_it, wi).atomic16 : &dummy16;
|
||||
const auto rchcnt_loop = is_pattern_match ? &::at32(reg_state_it, wi).rchcnt_loop : &dummy_loop;
|
||||
const auto rchcnt_loop = is_pattern_match ? &::at32(reg_state_it, wi).rchcnt_loop : &dummy_rdcnt;
|
||||
const auto rddec_loop = is_pattern_match ? &::at32(reg_state_it, wi).rddec_loop : &dummy_rddec;
|
||||
|
||||
const u32 pos = wa;
|
||||
|
||||
|
@ -5152,27 +5325,29 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
};
|
||||
|
||||
const auto break_channel_pattern = [&](u32 cause, rchcnt_loop_t previous)
|
||||
const auto break_a_pattern = <typename BreakT, typename Stats>[&](u32 cause, BreakT previous, std::string_view pattern_name)
|
||||
{
|
||||
if (previous.active && rchcnt_loop_all.contains(previous.read_pc))
|
||||
auto& all_patterns = pattern_map.get<std::map<u32, BreakT>>();
|
||||
|
||||
if (previous.active && all_patterns.contains(previous.read_pc))
|
||||
{
|
||||
const bool is_first = !std::exchange(rchcnt_loop_all[previous.read_pc].failed, true);
|
||||
const bool is_first = !std::exchange(all_patterns[previous.read_pc].failed, true);
|
||||
|
||||
if (!is_first)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
g_fxo->get<rchcnt_statistics_t>().breaking_reason[cause]++;
|
||||
g_fxo->get<Stats>().breaking_reason[cause]++;
|
||||
|
||||
if (!spu_log.notice)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
std::string break_error = fmt::format("Channel pattern breakage [%x cause=%u] (read_pc=0x%x)", pos, cause, previous.read_pc);
|
||||
std::string break_error = fmt::format("%s pattern breakage [%x cause=%u] (read_pc=0x%x)", pattern_name, pos, cause, previous.read_pc);
|
||||
|
||||
const auto values = sort_breakig_reasons(g_fxo->get<rchcnt_statistics_t>().breaking_reason);
|
||||
const auto values = sort_breakig_reasons(g_fxo->get<Stats>().breaking_reason);
|
||||
|
||||
std::string tracing = "Top Breaking Reasons:";
|
||||
|
||||
|
@ -5209,10 +5384,21 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
};
|
||||
|
||||
const auto break_channel_pattern = [&](u32 cause)
|
||||
{
|
||||
break_a_pattern(cause, rchcnt_loop->discard(), "Channel");
|
||||
};
|
||||
|
||||
const auto break_rddec_pattern = [&](u32 cause)
|
||||
{
|
||||
break_a_pattern(cause, rddec_loop->discard(), "RdDec");
|
||||
};
|
||||
|
||||
const auto break_all_patterns = [&](u32 cause)
|
||||
{
|
||||
break_putllc16(cause, atomic16->discard());
|
||||
break_channel_pattern(cause, rchcnt_loop->discard());
|
||||
break_rddec_pattern(cause, rddec_loop->discard());
|
||||
};
|
||||
|
||||
const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2)
|
||||
|
@ -5502,6 +5688,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
// Backup analyser information
|
||||
const auto atomic16_info = reg_state_it[stackframe_it].atomic16;
|
||||
const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop;
|
||||
const auto rddec_loop_info = reg_state_it[stackframe_it].rddec_loop;
|
||||
|
||||
// Clean from the back possible because it does not affect old indices
|
||||
// Technically should always do a full cleanup at the moment
|
||||
|
@ -5531,6 +5718,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
// Restore analyser information (if not an entry)
|
||||
next.atomic16 = atomic16_info;
|
||||
next.rddec_loop = rchcnt_loop_info;
|
||||
|
||||
if (previous_pc != rchcnt_loop_info.branch_pc || target_pc == rchcnt_loop_info.branch_target)
|
||||
next.rchcnt_loop = rchcnt_loop_info;
|
||||
|
@ -5549,6 +5737,14 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
pair.failed = true;
|
||||
pair.active = false;
|
||||
}
|
||||
|
||||
if (rddec_loop_info.active)
|
||||
{
|
||||
// Does not post-dominates channel read
|
||||
auto& pair = rddec_noop_loop_all[rddec_loop_info.read_pc];
|
||||
pair.failed = true;
|
||||
pair.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
next.iterator_id = iterator_id_alloc++;
|
||||
|
@ -5635,18 +5831,18 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
return;
|
||||
}
|
||||
|
||||
const u32 ones = (state.known_ones | mask_ones) & ~mask_zeroes;
|
||||
const u32 ones = (state.get_known_ones() | mask_ones) & ~mask_zeroes;
|
||||
const u32 zeroes = (state.known_zeroes | mask_zeroes) & ~mask_ones;
|
||||
|
||||
if ((ones ^ zeroes) == umax)
|
||||
{
|
||||
// Special case: create a constant from full masks
|
||||
vregs[reg] = reg_state_t::from_value(ones);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 new_value = (state.value | ones) & ~zeroes;
|
||||
const u32 bit_mask = state.bit_range & ~(ones | zeroes);
|
||||
|
||||
ensure(state.tag != umax);
|
||||
vregs[reg] = reg_state_t{vf::is_mask, 0, state.tag, ones, zeroes, state.origin};
|
||||
vregs[reg].origin = state.origin;
|
||||
vregs[reg].bit_range = bit_mask;
|
||||
vregs[reg].value = state.value;
|
||||
vregs[reg].tag = state.tag;
|
||||
};
|
||||
|
||||
const auto unconst = [&](u32 reg, u32 pc)
|
||||
|
@ -5714,6 +5910,14 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
}
|
||||
|
||||
auto emplace_origin = [&](std::vector<u32>& origins, u32 origin)
|
||||
{
|
||||
if (!std::count(origins.begin(), origins.end(), pos))
|
||||
{
|
||||
origins.emplace_back(origin);
|
||||
}
|
||||
};
|
||||
|
||||
if (rchcnt_loop->active)
|
||||
{
|
||||
if (std::find(rchcnt_loop->origins.begin(), rchcnt_loop->origins.end(), pos) != rchcnt_loop->origins.end())
|
||||
|
@ -5780,6 +5984,77 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
spu_log.always()("[SPU=0%x, it=%d] %s%s [%d]", pos, reg_state_it[wi].iterator_id, dis_asm.last_opcode, consts, atomic16->active);
|
||||
}
|
||||
|
||||
// Make unknown value
|
||||
const reg_state_t* ra_state = nullptr;
|
||||
const reg_state_t* rb_state = nullptr;
|
||||
const reg_state_t* rc_state = nullptr;
|
||||
reg_state_t reg_saved_state;
|
||||
{
|
||||
u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max;
|
||||
|
||||
if (m_use_ra.test(pos / 4))
|
||||
{
|
||||
ra = op.ra;
|
||||
ra_state = &vregs[op.ra];
|
||||
}
|
||||
|
||||
if (m_use_rb.test(pos / 4))
|
||||
{
|
||||
rb = op.rb;
|
||||
rb_state = &vregs[op.rb];
|
||||
}
|
||||
|
||||
if (type & spu_itype::_quadrop && m_use_rc.test(pos / 4))
|
||||
{
|
||||
rc = op.rc;
|
||||
rc_state = &vregs[op.rc];
|
||||
}
|
||||
|
||||
if (!(type & spu_itype::zregmod))
|
||||
{
|
||||
const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
||||
|
||||
if (op_rt == ra)
|
||||
{
|
||||
reg_saved_state = *ra_state;
|
||||
ra_state = ®_saved_state;
|
||||
}
|
||||
|
||||
if (op_rt == rb)
|
||||
{
|
||||
reg_saved_state = *rb_state;
|
||||
rb_state = ®_saved_state;
|
||||
}
|
||||
|
||||
if (op_rt == rc)
|
||||
{
|
||||
reg_saved_state = *rc_state;
|
||||
rc_state = ®_saved_state;
|
||||
}
|
||||
|
||||
masked_regs_t dest{};
|
||||
|
||||
if (ra != s_reg_max)
|
||||
{
|
||||
dest = dest.join(loop->current_state.get(ra));
|
||||
}
|
||||
|
||||
if (rb != s_reg_max)
|
||||
{
|
||||
dest = dest.join(loop->current_state.get(rb));
|
||||
}
|
||||
|
||||
if (rc != s_reg_max)
|
||||
{
|
||||
rddec_loop->current_state.get(op_rt) = dest.join(loop->current_state.get(rc));
|
||||
}
|
||||
else
|
||||
{
|
||||
rddec_loop->current_state.get(op_rt) = dest;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Analyse instruction
|
||||
switch (type)
|
||||
{
|
||||
|
@ -6167,20 +6442,21 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
const bool is_read = type == spu_itype::RDCH;
|
||||
bool invalidate = true;
|
||||
|
||||
const auto it = rchcnt_loop_all.find(pos);
|
||||
const auto cntit = rchcnt_loop_all.find(pos);
|
||||
const auto decit = rchcnt_loop_all.find(pos);
|
||||
|
||||
if (it != rchcnt_loop_all.end())
|
||||
if (cntit != rchcnt_loop_all.end())
|
||||
{
|
||||
if (rchcnt_loop->failed || !rchcnt_loop->conditioned || rchcnt_loop->read_pc != pos)
|
||||
{
|
||||
// Propagate faiure
|
||||
it->second.failed = true;
|
||||
it->second.active = false;
|
||||
it->second.conditioned = false;
|
||||
cntit->second.failed = true;
|
||||
cntit->second.active = false;
|
||||
cntit->second.conditioned = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
it->second.active = false;
|
||||
cntit->second.active = false;
|
||||
}
|
||||
|
||||
rchcnt_loop->active = false;
|
||||
|
@ -6194,6 +6470,31 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
}
|
||||
|
||||
if (decit != rddec_noop_loop_all.end())
|
||||
{
|
||||
if (rddec_loop->failed || !rddec_loop->conditioned || rddec_loop->read_pc != pos)
|
||||
{
|
||||
// Propagate faiure
|
||||
decit->second.failed = true;
|
||||
decit->second.active = false;
|
||||
decit->second.conditioned = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
decit->second.active = false;
|
||||
}
|
||||
|
||||
rddec_loop->active = false;
|
||||
}
|
||||
|
||||
if (rddec_loop->active)
|
||||
{
|
||||
if (rddec_loop->read_pc != pos)
|
||||
{
|
||||
break_channel_pattern(53, rddec_loop->discard());
|
||||
}
|
||||
}
|
||||
|
||||
switch (op.ra)
|
||||
{
|
||||
case MFC_RdAtomicStat:
|
||||
|
@ -6733,7 +7034,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
hbr_loc = spu_branch_target(pos, op.roh << 7 | op.rt);
|
||||
const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra);
|
||||
hbr_tg = af & vf::is_const && !op.c ? av & 0x3fffc : -1;
|
||||
hbr_tg = af.is_const() && !op.c ? av & 0x3fffc : -1;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7040,65 +7341,62 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
default:
|
||||
{
|
||||
// Make unknown value
|
||||
if (!(type & spu_itype::zregmod))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Make unknown value
|
||||
if (!(type & spu_itype::zregmod))
|
||||
{
|
||||
const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
||||
|
||||
u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max;
|
||||
|
||||
if (m_use_ra.test(pos / 4))
|
||||
{
|
||||
const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
||||
ra = op.ra;
|
||||
}
|
||||
|
||||
u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max;
|
||||
if (m_use_rb.test(pos / 4))
|
||||
{
|
||||
rb = op.rb;
|
||||
}
|
||||
|
||||
if (m_use_ra.test(pos / 4))
|
||||
if (type & spu_itype::_quadrop && m_use_rc.test(pos / 4))
|
||||
{
|
||||
rc = op.rc;
|
||||
}
|
||||
|
||||
u32 reg_pos = SPU_LS_SIZE;
|
||||
|
||||
for (u32 reg : {ra, rb, rc})
|
||||
{
|
||||
if (reg != s_reg_max)
|
||||
{
|
||||
ra = op.ra;
|
||||
}
|
||||
|
||||
if (m_use_rb.test(pos / 4))
|
||||
{
|
||||
rb = op.rb;
|
||||
}
|
||||
|
||||
if (type & spu_itype::_quadrop && m_use_rc.test(pos / 4))
|
||||
{
|
||||
rc = op.rc;
|
||||
}
|
||||
|
||||
u32 reg_pos = SPU_LS_SIZE;
|
||||
|
||||
for (u32 reg : {ra, rb, rc})
|
||||
{
|
||||
if (reg != s_reg_max)
|
||||
if (reg_pos == SPU_LS_SIZE)
|
||||
{
|
||||
if (reg_pos == SPU_LS_SIZE)
|
||||
{
|
||||
reg = vregs[reg].origin;
|
||||
}
|
||||
else if (reg_pos != vregs[reg].origin)
|
||||
{
|
||||
const u32 block_start = reg_state_it[wi].pc;
|
||||
|
||||
// if (vregs[reg].origin >= block_start && vregs[reg].origin <= pos)
|
||||
// {
|
||||
// reg_pos = std::max<u32>(vregs[reg].origin, reg_pos);
|
||||
// }
|
||||
reg_pos = block_start;
|
||||
break;
|
||||
}
|
||||
reg = vregs[reg].origin;
|
||||
}
|
||||
}
|
||||
|
||||
unconst(op_rt, reg_pos == SPU_LS_SIZE ? pos : reg_pos);
|
||||
|
||||
if (rchcnt_loop->active)
|
||||
{
|
||||
if (std::find(rchcnt_loop->origins.begin(), rchcnt_loop->origins.end(), vregs[op_rt].origin) == rchcnt_loop->origins.end())
|
||||
else if (reg_pos != vregs[reg].origin)
|
||||
{
|
||||
rchcnt_loop->origins.push_back(vregs[op_rt].origin);
|
||||
const u32 block_start = reg_state_it[wi].pc;
|
||||
|
||||
// if (vregs[reg].origin >= block_start && vregs[reg].origin <= pos)
|
||||
// {
|
||||
// reg_pos = std::max<u32>(vregs[reg].origin, reg_pos);
|
||||
// }
|
||||
reg_pos = block_start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
unconst(op_rt, reg_pos == SPU_LS_SIZE ? pos : reg_pos);
|
||||
|
||||
if (rchcnt_loop->active)
|
||||
{
|
||||
emplace_origin(rchcnt_loop->origins, vregs[op_rt].origin);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_targets.count(pos))
|
||||
|
|
|
@ -193,9 +193,7 @@ public:
|
|||
// Value flags (TODO: only is_const is implemented)
|
||||
enum class vf : u32
|
||||
{
|
||||
is_const,
|
||||
is_mask,
|
||||
is_rel,
|
||||
is_null,
|
||||
|
||||
__bitset_enum_max
|
||||
|
@ -206,8 +204,8 @@ public:
|
|||
bs_t<vf> flag{+vf::is_null};
|
||||
u32 value{};
|
||||
u32 tag = umax;
|
||||
u32 known_ones{};
|
||||
u32 known_zeroes{};
|
||||
u32 value_range = 0;
|
||||
u32 bit_range = umax;
|
||||
u32 origin = SPU_LS_SIZE;
|
||||
bool is_instruction = false;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue