SPU: Utilize Operating System sleep in detected RCHCNT loop

This commit is contained in:
Elad Ashkenazi 2024-07-13 10:48:41 +03:00
parent f3ceebabd9
commit 77e8f9a8ab
7 changed files with 271 additions and 100 deletions

View file

@ -244,7 +244,14 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
case SPU_In_MBox_offs: case SPU_In_MBox_offs:
{ {
ch_in_mbox.push(value); if (!ch_in_mbox.push(value).op_done)
{
if (auto cpu = cpu_thread::get_current())
{
cpu->state += cpu_flag::again;
}
}
return true; return true;
} }

View file

@ -6253,10 +6253,15 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
rchcnt_loop.ch_state = vregs[op.rt]; rchcnt_loop.ch_state = vregs[op.rt];
invalidate = false; invalidate = false;
} }
else if (rchcnt_loop.active && it != rchcnt_loop_all.end()) else if (rchcnt_loop.active)
{ {
// Success // Success
it->second.active = false; rchcnt_loop.active = false;
if (it == rchcnt_loop_all.end())
{
rchcnt_loop_all.emplace(pos, rchcnt_loop);
}
} }
break; break;
@ -7167,7 +7172,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none) if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
{ {
//add_pattern(false, inst_attr::ch_lop, get_pc - result.entry_point); add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point);
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash); spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
} }

View file

@ -3487,9 +3487,28 @@ public:
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>()); return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
} }
llvm::Value* wait_rchcnt(u32 off, u64 inv = 0)
{
auto wait_on_channel = [](spu_thread* _spu, spu_channel* ch, u32 is_read) -> u32
{
if (is_read)
{
ch->pop_wait(*_spu, false);
}
else
{
ch->push_wait(*_spu, 0, false);
}
return ch->get_count();
};
return m_ir->CreateXor(call("wait_on_spu_channel", +wait_on_channel, m_thread, _ptr<u64>(m_thread, off), m_ir->getInt32(!inv)), m_ir->getInt32(inv));
}
void RCHCNT(spu_opcode_t op) // void RCHCNT(spu_opcode_t op) //
{ {
value_t<u32> res; value_t<u32> res{};
if (m_interp_magn) if (m_interp_magn)
{ {
@ -3532,6 +3551,50 @@ public:
} }
} }
if (m_inst_attrs[(m_pos - m_base) / 4] == inst_attr::rchcnt_loop)
{
switch (op.ra)
{
case SPU_WrOutMbox:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_mbox), true);
break;
}
case SPU_WrOutIntrMbox:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true);
break;
}
case SPU_RdSigNotify1:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr1));
break;
}
case SPU_RdSigNotify2:
{
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr2));
break;
}
case SPU_RdInMbox:
{
auto wait_inbox = [](spu_thread* _spu, spu_channel_4_t* ch) -> u32
{
return ch->pop_wait(*_spu, false), ch->get_count();
};
res.value = call("wait_spu_inbox", +wait_inbox, m_thread, spu_ptr<void*>(&spu_thread::ch_in_mbox));
break;
}
default: break;
}
if (res.value)
{
set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
return;
}
}
switch (op.ra) switch (op.ra)
{ {
case SPU_WrOutMbox: case SPU_WrOutMbox:

View file

@ -390,6 +390,7 @@ protected:
omit, omit,
putllc16, putllc16,
putllc0, putllc0,
rchcnt_loop,
}; };
std::vector<inst_attr> m_inst_attrs; std::vector<inst_attr> m_inst_attrs;

View file

@ -2235,7 +2235,7 @@ void spu_thread::push_snr(u32 number, u32 value)
const bool bitor_bit = !!((snr_config >> number) & 1); const bool bitor_bit = !!((snr_config >> number) & 1);
// Redundant, g_use_rtm is checked inside tx_start now. // Redundant, g_use_rtm is checked inside tx_start now.
if (g_use_rtm) if (g_use_rtm && false)
{ {
bool channel_notify = false; bool channel_notify = false;
bool thread_notify = false; bool thread_notify = false;
@ -2295,8 +2295,21 @@ void spu_thread::push_snr(u32 number, u32 value)
}); });
// Check corresponding SNR register settings // Check corresponding SNR register settings
if (channel->push(value, bitor_bit)) auto push_state = channel->push(value, bitor_bit);
if (push_state.old_count < push_state.count)
{
set_events(event_bit); set_events(event_bit);
}
else if (!push_state.op_done)
{
ensure(is_stopped());
if (auto cpu = cpu_thread::get_current())
{
cpu->state += cpu_flag::again;
}
}
ch_events.atomic_op([](ch_events_t& ev) ch_events.atomic_op([](ch_events_t& ev)
{ {
@ -6846,8 +6859,13 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
return false; return false;
} }
data = bit_wait; data = (pop ? bit_occupy : 0) | bit_wait;
jostling_value.release(bit_wait);
if (pop)
{
jostling_value.release(bit_occupy);
}
return true; return true;
}).first; }).first;
@ -6862,29 +6880,39 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
if (!(data & bit_wait)) if (!(data & bit_wait))
{ {
return static_cast<u32>(jostling_value); return static_cast<u32>(pop ? jostling_value.exchange(0) : +data);
} }
} }
const u32 wait_on_val = static_cast<u32>(((pop ? bit_occupy : 0) | bit_wait) >> 32);
while (true) while (true)
{ {
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32{bit_wait >> 32}); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], wait_on_val);
old = data; old = data;
if (!(old & bit_wait)) if (!(old & bit_wait))
{ {
return static_cast<u32>(jostling_value); return static_cast<u32>(pop ? jostling_value.exchange(0) : +data);
} }
if (spu.is_stopped()) if (spu.is_stopped())
{ {
// Abort waiting and test if a value has been received // Abort waiting and test if a value has been received
if (u64 v = jostling_value.exchange(0); !(v & bit_wait)) if (pop)
{
if (u64 v = jostling_value.exchange(0); !(v & bit_occupy))
{ {
return static_cast<u32>(v); return static_cast<u32>(v);
} }
ensure(data.bit_test_reset(off_wait)); ensure(data.fetch_and(~(bit_wait | bit_occupy)) & bit_wait);
}
else
{
data.bit_test_reset(off_wait);
}
return -1; return -1;
} }
} }
@ -6898,8 +6926,8 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
{ {
if (data & bit_count) [[unlikely]] if (data & bit_count) [[unlikely]]
{ {
jostling_value.release(push ? value : static_cast<u32>(data)); jostling_value.release(push ? (bit_occupy | value) : static_cast<u32>(data));
data |= bit_wait; data |= (push ? bit_occupy : 0) | bit_wait;
} }
else if (push) else if (push)
{ {
@ -6919,11 +6947,6 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
{ {
if (!(state & bit_wait)) if (!(state & bit_wait))
{ {
if (!push)
{
data &= ~bit_count;
}
return true; return true;
} }
@ -6935,18 +6958,12 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
{ {
if (!(state & bit_wait)) if (!(state & bit_wait))
{ {
if (!push)
{
data &= ~bit_count;
}
return true; return true;
} }
if (spu.is_stopped()) if (spu.is_stopped())
{ {
data &= ~bit_wait; return !data.bit_test_reset(off_wait);
return false;
} }
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32)); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
@ -6954,12 +6971,17 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
} }
} }
std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu) std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu, bool pop_value)
{ {
auto old = values.fetch_op([&](sync_var_t& data) auto old = values.fetch_op([&](sync_var_t& data)
{ {
if (data.count != 0) if (data.count != 0)
{ {
if (!pop_value)
{
return;
}
data.waiting = 0; data.waiting = 0;
data.count--; data.count--;
@ -6969,8 +6991,8 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
} }
else else
{ {
data.waiting = 1; data.waiting = (pop_value ? bit_occupy : 0) | bit_wait;
jostling_value.release(bit_wait); jostling_value.release(pop_value ? jostling_flag : 0);
} }
}); });
@ -6979,7 +7001,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
return {old.count, old.value0}; return {old.count, old.value0};
} }
old.waiting = 1; old.waiting = (pop_value ? bit_occupy : 0) | bit_wait;
for (int i = 0; i < 10; i++) for (int i = 0; i < 10; i++)
{ {
@ -6987,7 +7009,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
if (!atomic_storage<u8>::load(values.raw().waiting)) if (!atomic_storage<u8>::load(values.raw().waiting))
{ {
return {1, static_cast<u32>(jostling_value)}; return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
} }
} }
@ -6996,26 +7018,91 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old)))); thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
old = values; old = values;
if (!old.waiting) if (~old.waiting & bit_wait)
{ {
// Count of 1 because a value has been inserted and popped in the same step. // Count of 1 because a value has been inserted and popped in the same step.
return {1, static_cast<u32>(jostling_value)}; return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
} }
if (spu.is_stopped()) if (spu.is_stopped())
{
if (pop_value)
{ {
// Abort waiting and test if a value has been received // Abort waiting and test if a value has been received
if (u64 v = jostling_value.exchange(0); !(v & bit_wait)) if (u64 v = jostling_value.exchange(0); !(v & jostling_flag))
{ {
return {1, static_cast<u32>(v)}; return {1, static_cast<u32>(v)};
} }
}
if (~atomic_storage<u8>::exchange(values.raw().waiting, 0) & bit_wait)
{
// Count of 1 because a value has been inserted and popped in the same step.
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
}
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
return {}; return {};
} }
} }
} }
spu_channel_op_state spu_channel_4_t::push(u32 value, bool postpone_notify)
{
while (true)
{
value3.release(value);
const auto [old, pushed_to_data] = values.fetch_op([&](sync_var_t& data)
{
if (data.waiting & bit_occupy)
{
return false;
}
switch (data.count++)
{
case 0: data.value0 = value; break;
case 1: data.value1 = value; break;
case 2: data.value2 = value; break;
default:
{
data.count = 4;
data.value3_inval++; // Ensure the SPU reads the most recent value3 write in try_pop by re-loading
break;
}
}
return true;
});
if (!pushed_to_data)
{
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
if (!jostling_value.compare_and_swap_test(jostling_flag, value))
{
// Other thread has inserted a value through jostling_value, retry
continue;
}
}
if (old.waiting & bit_wait)
{
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
if (~atomic_storage<u8>::exchange(values.raw().waiting, 0) & bit_wait)
{
// Could be fatal or at emulation stopping, to be checked by the caller
return { old.count, old.count, false, false };
}
if (!postpone_notify)
{
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
}
}
return { old.count, std::min<u8>(static_cast<u8>(old.count + 1), 4), !!(old.waiting & bit_wait), true };
}
}
template <> template <>
void fmt_class_string<spu_channel>::format(std::string& out, u64 arg) void fmt_class_string<spu_channel>::format(std::string& out, u64 arg)
{ {

View file

@ -176,6 +176,14 @@ enum : u32
SPU_FAKE_BASE_ADDR = 0xE8000000, SPU_FAKE_BASE_ADDR = 0xE8000000,
}; };
struct spu_channel_op_state
{
u8 old_count;
u8 count;
bool notify;
bool op_done;
};
struct alignas(16) spu_channel struct alignas(16) spu_channel
{ {
// Low 32 bits contain value // Low 32 bits contain value
@ -186,8 +194,10 @@ struct alignas(16) spu_channel
public: public:
static constexpr u32 off_wait = 32; static constexpr u32 off_wait = 32;
static constexpr u32 off_occupy = 32;
static constexpr u32 off_count = 63; static constexpr u32 off_count = 63;
static constexpr u64 bit_wait = 1ull << off_wait; static constexpr u64 bit_wait = 1ull << off_wait;
static constexpr u64 bit_occupy = 1ull << off_occupy;
static constexpr u64 bit_count = 1ull << off_count; static constexpr u64 bit_count = 1ull << off_count;
// Returns true on success // Returns true on success
@ -207,20 +217,21 @@ public:
// Push unconditionally, may require notification // Push unconditionally, may require notification
// Performing bitwise OR with previous value if specified, otherwise overwiting it // Performing bitwise OR with previous value if specified, otherwise overwiting it
bool push(u32 value, bool to_or = false) // Returns old count and new count
spu_channel_op_state push(u32 value, bool to_or = false, bool postpone_notify = false)
{ {
while (true) while (true)
{ {
const auto [old, pushed_to_data] = data.fetch_op([&](u64& data) const auto [old, pushed_to_data] = data.fetch_op([&](u64& data)
{ {
if (data == bit_wait) if (data & bit_occupy)
{ {
return false; return false;
} }
if (to_or) if (to_or)
{ {
data |= bit_count | value; data = bit_count | (static_cast<u32>(data) | value);
} }
else else
{ {
@ -233,26 +244,42 @@ public:
if (!pushed_to_data) if (!pushed_to_data)
{ {
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data // Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
if (!jostling_value.compare_and_swap_test(bit_wait, value)) if (!jostling_value.compare_and_swap_test(bit_occupy, value))
{ {
// Other thread has inserted a value through jostling_value, retry // Other thread has inserted a value through jostling_value, retry
continue; continue;
} }
}
if (old & bit_wait)
{
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value) // Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
ensure(this->data.bit_test_reset(off_wait)); if (!this->data.bit_test_reset(off_wait))
{
// Could be fatal or at emulation stopping, to be checked by the caller
return { (old & bit_count) == 0, 0, false, false };
}
if (!postpone_notify)
{
utils::bless<atomic_t<u32>>(&data)[1].notify_one(); utils::bless<atomic_t<u32>>(&data)[1].notify_one();
} }
}
// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs // Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
return !pushed_to_data || (old & bit_count) == 0; return { (old & bit_count) == 0, 1, (old & bit_wait) != 0, true };
} }
} }
void notify()
{
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
}
// Returns true on success // Returns true on success
bool try_pop(u32& out) bool try_pop(u32& out)
{ {
return data.fetch_op([&](u64& data) return data.fetch_op([&out](u64& data)
{ {
if (data & bit_count) [[likely]] if (data & bit_count) [[likely]]
{ {
@ -284,7 +311,7 @@ public:
u32 pop() u32 pop()
{ {
// Value is not cleared and may be read again // Value is not cleared and may be read again
constexpr u64 mask = bit_count | bit_wait; constexpr u64 mask = bit_count | bit_occupy;
const u64 old = data.fetch_op([&](u64& data) const u64 old = data.fetch_op([&](u64& data)
{ {
@ -295,10 +322,10 @@ public:
return; return;
} }
data &= ~mask; data &= ~(mask | bit_wait);
}); });
if ((old & mask) == mask) if (old & bit_wait)
{ {
utils::bless<atomic_t<u32>>(&data)[1].notify_one(); utils::bless<atomic_t<u32>>(&data)[1].notify_one();
} }
@ -324,7 +351,7 @@ public:
u32 get_count() const u32 get_count() const
{ {
return static_cast<u32>(data >> off_count); return (data & bit_count) ? 1 : 0;
} }
}; };
@ -344,61 +371,28 @@ struct spu_channel_4_t
atomic_t<u64> jostling_value; atomic_t<u64> jostling_value;
atomic_t<u32> value3; atomic_t<u32> value3;
static constexpr u32 off_wait = 32; static constexpr u32 off_wait = 0;
static constexpr u32 off_occupy = 7;
static constexpr u64 bit_wait = 1ull << off_wait; static constexpr u64 bit_wait = 1ull << off_wait;
static constexpr u64 bit_occupy = 1ull << off_occupy;
static constexpr u64 jostling_flag = 1ull << 63;
void clear() void clear()
{ {
values.release({}); values.release({});
jostling_value.release(0);
value3.release(0);
} }
// push unconditionally (overwriting latest value), returns true if needs signaling // push unconditionally (overwriting latest value), returns true if needs signaling
void push(u32 value) // returning if could be aborted (operation failed unexpectedly)
{ spu_channel_op_state push(u32 value, bool postpone_notify = false);
while (true)
{
value3.release(value);
const auto [old, pushed_to_data] = values.fetch_op([&](sync_var_t& data)
{
if (data.waiting)
{
return false;
}
switch (data.count++) void notify()
{ {
case 0: data.value0 = value; break;
case 1: data.value1 = value; break;
case 2: data.value2 = value; break;
default:
{
data.count = 4;
data.value3_inval++; // Ensure the SPU reads the most recent value3 write in try_pop by re-loading
break;
}
}
return true;
});
if (!pushed_to_data)
{
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
if (!jostling_value.compare_and_swap_test(bit_wait, value))
{
// Other thread has inserted a value through jostling_value, retry
continue;
}
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
utils::bless<atomic_t<u32>>(&values)[0].notify_one(); utils::bless<atomic_t<u32>>(&values)[0].notify_one();
} }
return;
}
}
// returns non-zero value on success: queue size before removal // returns non-zero value on success: queue size before removal
uint try_pop(u32& out) uint try_pop(u32& out)
{ {
@ -422,7 +416,7 @@ struct spu_channel_4_t
} }
// Returns [previous count, value] (if aborted 0 count is returned) // Returns [previous count, value] (if aborted 0 count is returned)
std::pair<u32, u32> pop_wait(cpu_thread& spu); std::pair<u32, u32> pop_wait(cpu_thread& spu, bool pop_value = true);
// returns current queue size without modification // returns current queue size without modification
uint try_read(u32 (&out)[4]) const uint try_read(u32 (&out)[4]) const
@ -443,7 +437,7 @@ struct spu_channel_4_t
u32 get_count() const u32 get_count() const
{ {
return std::as_const(values).raw().count; return atomic_storage<u8>::load(std::as_const(values).raw().count);
} }
void set_values(u32 count, u32 value0, u32 value1 = 0, u32 value2 = 0, u32 value3 = 0) void set_values(u32 count, u32 value0, u32 value1 = 0, u32 value2 = 0, u32 value3 = 0)

View file

@ -1749,9 +1749,23 @@ error_code sys_spu_thread_write_spu_mb(ppu_thread& ppu, u32 id, u32 value)
return CELL_ESRCH; return CELL_ESRCH;
} }
spu_channel_op_state state{};
{
std::lock_guard lock(group->mutex); std::lock_guard lock(group->mutex);
thread->ch_in_mbox.push(value); state = thread->ch_in_mbox.push(value, true);
}
if (!state.op_done)
{
ppu.state += cpu_flag::again;
return {};
}
if (state.notify)
{
thread->ch_in_mbox.notify();
}
return CELL_OK; return CELL_OK;
} }