mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 13:31:27 +12:00
SPU: Utilize Operating System sleep in detected RCHCNT loop
This commit is contained in:
parent
f3ceebabd9
commit
77e8f9a8ab
7 changed files with 271 additions and 100 deletions
|
@ -244,7 +244,14 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
|
||||||
|
|
||||||
case SPU_In_MBox_offs:
|
case SPU_In_MBox_offs:
|
||||||
{
|
{
|
||||||
ch_in_mbox.push(value);
|
if (!ch_in_mbox.push(value).op_done)
|
||||||
|
{
|
||||||
|
if (auto cpu = cpu_thread::get_current())
|
||||||
|
{
|
||||||
|
cpu->state += cpu_flag::again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6253,10 +6253,15 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||||
rchcnt_loop.ch_state = vregs[op.rt];
|
rchcnt_loop.ch_state = vregs[op.rt];
|
||||||
invalidate = false;
|
invalidate = false;
|
||||||
}
|
}
|
||||||
else if (rchcnt_loop.active && it != rchcnt_loop_all.end())
|
else if (rchcnt_loop.active)
|
||||||
{
|
{
|
||||||
// Success
|
// Success
|
||||||
it->second.active = false;
|
rchcnt_loop.active = false;
|
||||||
|
|
||||||
|
if (it == rchcnt_loop_all.end())
|
||||||
|
{
|
||||||
|
rchcnt_loop_all.emplace(pos, rchcnt_loop);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -7167,7 +7172,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||||
|
|
||||||
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
|
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
|
||||||
{
|
{
|
||||||
//add_pattern(false, inst_attr::ch_lop, get_pc - result.entry_point);
|
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point);
|
||||||
|
|
||||||
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
|
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3487,9 +3487,28 @@ public:
|
||||||
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
|
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llvm::Value* wait_rchcnt(u32 off, u64 inv = 0)
|
||||||
|
{
|
||||||
|
auto wait_on_channel = [](spu_thread* _spu, spu_channel* ch, u32 is_read) -> u32
|
||||||
|
{
|
||||||
|
if (is_read)
|
||||||
|
{
|
||||||
|
ch->pop_wait(*_spu, false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ch->push_wait(*_spu, 0, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ch->get_count();
|
||||||
|
};
|
||||||
|
|
||||||
|
return m_ir->CreateXor(call("wait_on_spu_channel", +wait_on_channel, m_thread, _ptr<u64>(m_thread, off), m_ir->getInt32(!inv)), m_ir->getInt32(inv));
|
||||||
|
}
|
||||||
|
|
||||||
void RCHCNT(spu_opcode_t op) //
|
void RCHCNT(spu_opcode_t op) //
|
||||||
{
|
{
|
||||||
value_t<u32> res;
|
value_t<u32> res{};
|
||||||
|
|
||||||
if (m_interp_magn)
|
if (m_interp_magn)
|
||||||
{
|
{
|
||||||
|
@ -3532,6 +3551,50 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_inst_attrs[(m_pos - m_base) / 4] == inst_attr::rchcnt_loop)
|
||||||
|
{
|
||||||
|
switch (op.ra)
|
||||||
|
{
|
||||||
|
case SPU_WrOutMbox:
|
||||||
|
{
|
||||||
|
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_mbox), true);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SPU_WrOutIntrMbox:
|
||||||
|
{
|
||||||
|
res.value = wait_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SPU_RdSigNotify1:
|
||||||
|
{
|
||||||
|
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SPU_RdSigNotify2:
|
||||||
|
{
|
||||||
|
res.value = wait_rchcnt(::offset32(&spu_thread::ch_snr2));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SPU_RdInMbox:
|
||||||
|
{
|
||||||
|
auto wait_inbox = [](spu_thread* _spu, spu_channel_4_t* ch) -> u32
|
||||||
|
{
|
||||||
|
return ch->pop_wait(*_spu, false), ch->get_count();
|
||||||
|
};
|
||||||
|
|
||||||
|
res.value = call("wait_spu_inbox", +wait_inbox, m_thread, spu_ptr<void*>(&spu_thread::ch_in_mbox));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res.value)
|
||||||
|
{
|
||||||
|
set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (op.ra)
|
switch (op.ra)
|
||||||
{
|
{
|
||||||
case SPU_WrOutMbox:
|
case SPU_WrOutMbox:
|
||||||
|
|
|
@ -390,6 +390,7 @@ protected:
|
||||||
omit,
|
omit,
|
||||||
putllc16,
|
putllc16,
|
||||||
putllc0,
|
putllc0,
|
||||||
|
rchcnt_loop,
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<inst_attr> m_inst_attrs;
|
std::vector<inst_attr> m_inst_attrs;
|
||||||
|
|
|
@ -2235,7 +2235,7 @@ void spu_thread::push_snr(u32 number, u32 value)
|
||||||
const bool bitor_bit = !!((snr_config >> number) & 1);
|
const bool bitor_bit = !!((snr_config >> number) & 1);
|
||||||
|
|
||||||
// Redundant, g_use_rtm is checked inside tx_start now.
|
// Redundant, g_use_rtm is checked inside tx_start now.
|
||||||
if (g_use_rtm)
|
if (g_use_rtm && false)
|
||||||
{
|
{
|
||||||
bool channel_notify = false;
|
bool channel_notify = false;
|
||||||
bool thread_notify = false;
|
bool thread_notify = false;
|
||||||
|
@ -2295,8 +2295,21 @@ void spu_thread::push_snr(u32 number, u32 value)
|
||||||
});
|
});
|
||||||
|
|
||||||
// Check corresponding SNR register settings
|
// Check corresponding SNR register settings
|
||||||
if (channel->push(value, bitor_bit))
|
auto push_state = channel->push(value, bitor_bit);
|
||||||
|
|
||||||
|
if (push_state.old_count < push_state.count)
|
||||||
|
{
|
||||||
set_events(event_bit);
|
set_events(event_bit);
|
||||||
|
}
|
||||||
|
else if (!push_state.op_done)
|
||||||
|
{
|
||||||
|
ensure(is_stopped());
|
||||||
|
|
||||||
|
if (auto cpu = cpu_thread::get_current())
|
||||||
|
{
|
||||||
|
cpu->state += cpu_flag::again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ch_events.atomic_op([](ch_events_t& ev)
|
ch_events.atomic_op([](ch_events_t& ev)
|
||||||
{
|
{
|
||||||
|
@ -6846,8 +6859,13 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
data = bit_wait;
|
data = (pop ? bit_occupy : 0) | bit_wait;
|
||||||
jostling_value.release(bit_wait);
|
|
||||||
|
if (pop)
|
||||||
|
{
|
||||||
|
jostling_value.release(bit_occupy);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}).first;
|
}).first;
|
||||||
|
|
||||||
|
@ -6862,29 +6880,39 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
|
||||||
|
|
||||||
if (!(data & bit_wait))
|
if (!(data & bit_wait))
|
||||||
{
|
{
|
||||||
return static_cast<u32>(jostling_value);
|
return static_cast<u32>(pop ? jostling_value.exchange(0) : +data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u32 wait_on_val = static_cast<u32>(((pop ? bit_occupy : 0) | bit_wait) >> 32);
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32{bit_wait >> 32});
|
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], wait_on_val);
|
||||||
old = data;
|
old = data;
|
||||||
|
|
||||||
if (!(old & bit_wait))
|
if (!(old & bit_wait))
|
||||||
{
|
{
|
||||||
return static_cast<u32>(jostling_value);
|
return static_cast<u32>(pop ? jostling_value.exchange(0) : +data);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (spu.is_stopped())
|
if (spu.is_stopped())
|
||||||
{
|
{
|
||||||
// Abort waiting and test if a value has been received
|
// Abort waiting and test if a value has been received
|
||||||
if (u64 v = jostling_value.exchange(0); !(v & bit_wait))
|
if (pop)
|
||||||
|
{
|
||||||
|
if (u64 v = jostling_value.exchange(0); !(v & bit_occupy))
|
||||||
{
|
{
|
||||||
return static_cast<u32>(v);
|
return static_cast<u32>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure(data.bit_test_reset(off_wait));
|
ensure(data.fetch_and(~(bit_wait | bit_occupy)) & bit_wait);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data.bit_test_reset(off_wait);
|
||||||
|
}
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6898,8 +6926,8 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
|
||||||
{
|
{
|
||||||
if (data & bit_count) [[unlikely]]
|
if (data & bit_count) [[unlikely]]
|
||||||
{
|
{
|
||||||
jostling_value.release(push ? value : static_cast<u32>(data));
|
jostling_value.release(push ? (bit_occupy | value) : static_cast<u32>(data));
|
||||||
data |= bit_wait;
|
data |= (push ? bit_occupy : 0) | bit_wait;
|
||||||
}
|
}
|
||||||
else if (push)
|
else if (push)
|
||||||
{
|
{
|
||||||
|
@ -6919,11 +6947,6 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
|
||||||
{
|
{
|
||||||
if (!(state & bit_wait))
|
if (!(state & bit_wait))
|
||||||
{
|
{
|
||||||
if (!push)
|
|
||||||
{
|
|
||||||
data &= ~bit_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6935,18 +6958,12 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
|
||||||
{
|
{
|
||||||
if (!(state & bit_wait))
|
if (!(state & bit_wait))
|
||||||
{
|
{
|
||||||
if (!push)
|
|
||||||
{
|
|
||||||
data &= ~bit_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (spu.is_stopped())
|
if (spu.is_stopped())
|
||||||
{
|
{
|
||||||
data &= ~bit_wait;
|
return !data.bit_test_reset(off_wait);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
|
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
|
||||||
|
@ -6954,12 +6971,17 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
|
std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu, bool pop_value)
|
||||||
{
|
{
|
||||||
auto old = values.fetch_op([&](sync_var_t& data)
|
auto old = values.fetch_op([&](sync_var_t& data)
|
||||||
{
|
{
|
||||||
if (data.count != 0)
|
if (data.count != 0)
|
||||||
{
|
{
|
||||||
|
if (!pop_value)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
data.waiting = 0;
|
data.waiting = 0;
|
||||||
data.count--;
|
data.count--;
|
||||||
|
|
||||||
|
@ -6969,8 +6991,8 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
data.waiting = 1;
|
data.waiting = (pop_value ? bit_occupy : 0) | bit_wait;
|
||||||
jostling_value.release(bit_wait);
|
jostling_value.release(pop_value ? jostling_flag : 0);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -6979,7 +7001,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
|
||||||
return {old.count, old.value0};
|
return {old.count, old.value0};
|
||||||
}
|
}
|
||||||
|
|
||||||
old.waiting = 1;
|
old.waiting = (pop_value ? bit_occupy : 0) | bit_wait;
|
||||||
|
|
||||||
for (int i = 0; i < 10; i++)
|
for (int i = 0; i < 10; i++)
|
||||||
{
|
{
|
||||||
|
@ -6987,7 +7009,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
|
||||||
|
|
||||||
if (!atomic_storage<u8>::load(values.raw().waiting))
|
if (!atomic_storage<u8>::load(values.raw().waiting))
|
||||||
{
|
{
|
||||||
return {1, static_cast<u32>(jostling_value)};
|
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6996,26 +7018,91 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
|
||||||
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
|
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
|
||||||
old = values;
|
old = values;
|
||||||
|
|
||||||
if (!old.waiting)
|
if (~old.waiting & bit_wait)
|
||||||
{
|
{
|
||||||
// Count of 1 because a value has been inserted and popped in the same step.
|
// Count of 1 because a value has been inserted and popped in the same step.
|
||||||
return {1, static_cast<u32>(jostling_value)};
|
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (spu.is_stopped())
|
if (spu.is_stopped())
|
||||||
|
{
|
||||||
|
if (pop_value)
|
||||||
{
|
{
|
||||||
// Abort waiting and test if a value has been received
|
// Abort waiting and test if a value has been received
|
||||||
if (u64 v = jostling_value.exchange(0); !(v & bit_wait))
|
if (u64 v = jostling_value.exchange(0); !(v & jostling_flag))
|
||||||
{
|
{
|
||||||
return {1, static_cast<u32>(v)};
|
return {1, static_cast<u32>(v)};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (~atomic_storage<u8>::exchange(values.raw().waiting, 0) & bit_wait)
|
||||||
|
{
|
||||||
|
// Count of 1 because a value has been inserted and popped in the same step.
|
||||||
|
return {1, static_cast<u32>(pop_value ? jostling_value.exchange(0) : 0)};
|
||||||
|
}
|
||||||
|
|
||||||
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spu_channel_op_state spu_channel_4_t::push(u32 value, bool postpone_notify)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
value3.release(value);
|
||||||
|
const auto [old, pushed_to_data] = values.fetch_op([&](sync_var_t& data)
|
||||||
|
{
|
||||||
|
if (data.waiting & bit_occupy)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (data.count++)
|
||||||
|
{
|
||||||
|
case 0: data.value0 = value; break;
|
||||||
|
case 1: data.value1 = value; break;
|
||||||
|
case 2: data.value2 = value; break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
data.count = 4;
|
||||||
|
data.value3_inval++; // Ensure the SPU reads the most recent value3 write in try_pop by re-loading
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!pushed_to_data)
|
||||||
|
{
|
||||||
|
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
|
||||||
|
if (!jostling_value.compare_and_swap_test(jostling_flag, value))
|
||||||
|
{
|
||||||
|
// Other thread has inserted a value through jostling_value, retry
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (old.waiting & bit_wait)
|
||||||
|
{
|
||||||
|
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
|
||||||
|
if (~atomic_storage<u8>::exchange(values.raw().waiting, 0) & bit_wait)
|
||||||
|
{
|
||||||
|
// Could be fatal or at emulation stopping, to be checked by the caller
|
||||||
|
return { old.count, old.count, false, false };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!postpone_notify)
|
||||||
|
{
|
||||||
|
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { old.count, std::min<u8>(static_cast<u8>(old.count + 1), 4), !!(old.waiting & bit_wait), true };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void fmt_class_string<spu_channel>::format(std::string& out, u64 arg)
|
void fmt_class_string<spu_channel>::format(std::string& out, u64 arg)
|
||||||
{
|
{
|
||||||
|
|
|
@ -176,6 +176,14 @@ enum : u32
|
||||||
SPU_FAKE_BASE_ADDR = 0xE8000000,
|
SPU_FAKE_BASE_ADDR = 0xE8000000,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct spu_channel_op_state
|
||||||
|
{
|
||||||
|
u8 old_count;
|
||||||
|
u8 count;
|
||||||
|
bool notify;
|
||||||
|
bool op_done;
|
||||||
|
};
|
||||||
|
|
||||||
struct alignas(16) spu_channel
|
struct alignas(16) spu_channel
|
||||||
{
|
{
|
||||||
// Low 32 bits contain value
|
// Low 32 bits contain value
|
||||||
|
@ -186,8 +194,10 @@ struct alignas(16) spu_channel
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr u32 off_wait = 32;
|
static constexpr u32 off_wait = 32;
|
||||||
|
static constexpr u32 off_occupy = 32;
|
||||||
static constexpr u32 off_count = 63;
|
static constexpr u32 off_count = 63;
|
||||||
static constexpr u64 bit_wait = 1ull << off_wait;
|
static constexpr u64 bit_wait = 1ull << off_wait;
|
||||||
|
static constexpr u64 bit_occupy = 1ull << off_occupy;
|
||||||
static constexpr u64 bit_count = 1ull << off_count;
|
static constexpr u64 bit_count = 1ull << off_count;
|
||||||
|
|
||||||
// Returns true on success
|
// Returns true on success
|
||||||
|
@ -207,20 +217,21 @@ public:
|
||||||
|
|
||||||
// Push unconditionally, may require notification
|
// Push unconditionally, may require notification
|
||||||
// Performing bitwise OR with previous value if specified, otherwise overwiting it
|
// Performing bitwise OR with previous value if specified, otherwise overwiting it
|
||||||
bool push(u32 value, bool to_or = false)
|
// Returns old count and new count
|
||||||
|
spu_channel_op_state push(u32 value, bool to_or = false, bool postpone_notify = false)
|
||||||
{
|
{
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
const auto [old, pushed_to_data] = data.fetch_op([&](u64& data)
|
const auto [old, pushed_to_data] = data.fetch_op([&](u64& data)
|
||||||
{
|
{
|
||||||
if (data == bit_wait)
|
if (data & bit_occupy)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to_or)
|
if (to_or)
|
||||||
{
|
{
|
||||||
data |= bit_count | value;
|
data = bit_count | (static_cast<u32>(data) | value);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -233,26 +244,42 @@ public:
|
||||||
if (!pushed_to_data)
|
if (!pushed_to_data)
|
||||||
{
|
{
|
||||||
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
|
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
|
||||||
if (!jostling_value.compare_and_swap_test(bit_wait, value))
|
if (!jostling_value.compare_and_swap_test(bit_occupy, value))
|
||||||
{
|
{
|
||||||
// Other thread has inserted a value through jostling_value, retry
|
// Other thread has inserted a value through jostling_value, retry
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (old & bit_wait)
|
||||||
|
{
|
||||||
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
|
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
|
||||||
ensure(this->data.bit_test_reset(off_wait));
|
if (!this->data.bit_test_reset(off_wait))
|
||||||
|
{
|
||||||
|
// Could be fatal or at emulation stopping, to be checked by the caller
|
||||||
|
return { (old & bit_count) == 0, 0, false, false };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!postpone_notify)
|
||||||
|
{
|
||||||
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
|
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
|
// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
|
||||||
return !pushed_to_data || (old & bit_count) == 0;
|
return { (old & bit_count) == 0, 1, (old & bit_wait) != 0, true };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void notify()
|
||||||
|
{
|
||||||
|
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
// Returns true on success
|
// Returns true on success
|
||||||
bool try_pop(u32& out)
|
bool try_pop(u32& out)
|
||||||
{
|
{
|
||||||
return data.fetch_op([&](u64& data)
|
return data.fetch_op([&out](u64& data)
|
||||||
{
|
{
|
||||||
if (data & bit_count) [[likely]]
|
if (data & bit_count) [[likely]]
|
||||||
{
|
{
|
||||||
|
@ -284,7 +311,7 @@ public:
|
||||||
u32 pop()
|
u32 pop()
|
||||||
{
|
{
|
||||||
// Value is not cleared and may be read again
|
// Value is not cleared and may be read again
|
||||||
constexpr u64 mask = bit_count | bit_wait;
|
constexpr u64 mask = bit_count | bit_occupy;
|
||||||
|
|
||||||
const u64 old = data.fetch_op([&](u64& data)
|
const u64 old = data.fetch_op([&](u64& data)
|
||||||
{
|
{
|
||||||
|
@ -295,10 +322,10 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
data &= ~mask;
|
data &= ~(mask | bit_wait);
|
||||||
});
|
});
|
||||||
|
|
||||||
if ((old & mask) == mask)
|
if (old & bit_wait)
|
||||||
{
|
{
|
||||||
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
|
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
|
||||||
}
|
}
|
||||||
|
@ -324,7 +351,7 @@ public:
|
||||||
|
|
||||||
u32 get_count() const
|
u32 get_count() const
|
||||||
{
|
{
|
||||||
return static_cast<u32>(data >> off_count);
|
return (data & bit_count) ? 1 : 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -344,61 +371,28 @@ struct spu_channel_4_t
|
||||||
atomic_t<u64> jostling_value;
|
atomic_t<u64> jostling_value;
|
||||||
atomic_t<u32> value3;
|
atomic_t<u32> value3;
|
||||||
|
|
||||||
static constexpr u32 off_wait = 32;
|
static constexpr u32 off_wait = 0;
|
||||||
|
static constexpr u32 off_occupy = 7;
|
||||||
static constexpr u64 bit_wait = 1ull << off_wait;
|
static constexpr u64 bit_wait = 1ull << off_wait;
|
||||||
|
static constexpr u64 bit_occupy = 1ull << off_occupy;
|
||||||
|
static constexpr u64 jostling_flag = 1ull << 63;
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
values.release({});
|
values.release({});
|
||||||
|
jostling_value.release(0);
|
||||||
|
value3.release(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// push unconditionally (overwriting latest value), returns true if needs signaling
|
// push unconditionally (overwriting latest value), returns true if needs signaling
|
||||||
void push(u32 value)
|
// returning if could be aborted (operation failed unexpectedly)
|
||||||
{
|
spu_channel_op_state push(u32 value, bool postpone_notify = false);
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
value3.release(value);
|
|
||||||
const auto [old, pushed_to_data] = values.fetch_op([&](sync_var_t& data)
|
|
||||||
{
|
|
||||||
if (data.waiting)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (data.count++)
|
void notify()
|
||||||
{
|
{
|
||||||
case 0: data.value0 = value; break;
|
|
||||||
case 1: data.value1 = value; break;
|
|
||||||
case 2: data.value2 = value; break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
data.count = 4;
|
|
||||||
data.value3_inval++; // Ensure the SPU reads the most recent value3 write in try_pop by re-loading
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!pushed_to_data)
|
|
||||||
{
|
|
||||||
// Insert the pending value in special storage for waiting SPUs, leave no time in which the channel has data
|
|
||||||
if (!jostling_value.compare_and_swap_test(bit_wait, value))
|
|
||||||
{
|
|
||||||
// Other thread has inserted a value through jostling_value, retry
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
|
|
||||||
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
|
|
||||||
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
|
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns non-zero value on success: queue size before removal
|
// returns non-zero value on success: queue size before removal
|
||||||
uint try_pop(u32& out)
|
uint try_pop(u32& out)
|
||||||
{
|
{
|
||||||
|
@ -422,7 +416,7 @@ struct spu_channel_4_t
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns [previous count, value] (if aborted 0 count is returned)
|
// Returns [previous count, value] (if aborted 0 count is returned)
|
||||||
std::pair<u32, u32> pop_wait(cpu_thread& spu);
|
std::pair<u32, u32> pop_wait(cpu_thread& spu, bool pop_value = true);
|
||||||
|
|
||||||
// returns current queue size without modification
|
// returns current queue size without modification
|
||||||
uint try_read(u32 (&out)[4]) const
|
uint try_read(u32 (&out)[4]) const
|
||||||
|
@ -443,7 +437,7 @@ struct spu_channel_4_t
|
||||||
|
|
||||||
u32 get_count() const
|
u32 get_count() const
|
||||||
{
|
{
|
||||||
return std::as_const(values).raw().count;
|
return atomic_storage<u8>::load(std::as_const(values).raw().count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_values(u32 count, u32 value0, u32 value1 = 0, u32 value2 = 0, u32 value3 = 0)
|
void set_values(u32 count, u32 value0, u32 value1 = 0, u32 value2 = 0, u32 value3 = 0)
|
||||||
|
|
|
@ -1749,9 +1749,23 @@ error_code sys_spu_thread_write_spu_mb(ppu_thread& ppu, u32 id, u32 value)
|
||||||
return CELL_ESRCH;
|
return CELL_ESRCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spu_channel_op_state state{};
|
||||||
|
{
|
||||||
std::lock_guard lock(group->mutex);
|
std::lock_guard lock(group->mutex);
|
||||||
|
|
||||||
thread->ch_in_mbox.push(value);
|
state = thread->ch_in_mbox.push(value, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!state.op_done)
|
||||||
|
{
|
||||||
|
ppu.state += cpu_flag::again;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.notify)
|
||||||
|
{
|
||||||
|
thread->ch_in_mbox.notify();
|
||||||
|
}
|
||||||
|
|
||||||
return CELL_OK;
|
return CELL_OK;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue