SPU: Operating system LR memory signals

This commit is contained in:
Elad 2024-12-25 11:35:03 +02:00
parent e135012acd
commit a8bfa6a16c
5 changed files with 366 additions and 36 deletions

View file

@ -341,6 +341,21 @@ extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src)
#endif
}
#ifdef _MSC_VER
__forceinline
#endif
extern u32 compute_rdata_hash32(const spu_rdata_t& _src)
{
const auto rhs = reinterpret_cast<const v128*>(_src);
const v128 a = gv_add32(rhs[0], rhs[1]);
const v128 c = gv_add32(rhs[4], rhs[5]);
const v128 b = gv_add32(rhs[2], rhs[3]);
const v128 d = gv_add32(rhs[6], rhs[7]);
const v128 r = gv_add32(gv_add32(a, b), gv_add32(c, d));
const v128 r1 = gv_add32(r, gv_shuffle32<1, 0, 3, 2>(r));
return r1._u32[0] + r1._u32[2];
}
#if defined(ARCH_X64)
static FORCE_INLINE void mov_rdata_nt_avx(__m256i* dst, const __m256i* src)
{
@ -4718,6 +4733,12 @@ bool spu_thread::process_mfc_cmd()
busy_wait(300);
}
if (getllar_spin_count == 3)
{
// Check other reservations in other threads
lv2_obj::notify_all();
}
// Reset perf
perf0.restart();
}
@ -4729,12 +4750,17 @@ bool spu_thread::process_mfc_cmd()
// Spinning, might as well yield cpu resources
state += cpu_flag::wait;
usz cache_line_waiter_index = umax;
if (auto wait_var = vm::reservation_notifier_begin_wait(addr, rtime))
{
cache_line_waiter_index = register_cache_line_waiter(addr);
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{100'000});
vm::reservation_notifier_end_wait(*wait_var);
}
deregister_cache_line_waiter(cache_line_waiter_index);
static_cast<void>(test_stopped());
// Quick check if there were reservation changes
@ -5372,6 +5398,140 @@ bool spu_thread::reservation_check(u32 addr, const decltype(rdata)& data) const
return !res;
}
bool spu_thread::reservation_check(u32 addr, u32 hash, atomic_t<u64, 64>* range_lock)
{
if ((addr >> 28) < 2 || (addr >> 28) == 0xd)
{
// Always-allocated memory does not need strict checking (vm::main or vm::stack)
return compute_rdata_hash32(*vm::get_super_ptr<decltype(rdata)>(addr)) == hash;
}
// Ensure data is allocated (HACK: would raise LR event if not)
// Set range_lock first optimistically
range_lock->store(u64{128} << 32 | addr);
u64 lock_val = *std::prev(std::end(vm::g_range_lock_set));
u64 old_lock = 0;
while (lock_val != old_lock)
{
// Since we want to read data, let's check readability first
if (!(lock_val & vm::range_readable))
{
// Only one abnormal operation is "unreadable"
if ((lock_val >> vm::range_pos) == (vm::range_locked >> vm::range_pos))
{
// All page flags are untouched and can be read safely
if (!vm::check_addr(addr))
{
// Assume our memory is being (de)allocated
range_lock->release(0);
break;
}
// g_shmem values are unchanged too
const u64 is_shmem = vm::g_shmem[addr >> 16];
const u64 test_addr = is_shmem ? (is_shmem | static_cast<u16>(addr)) / 128 : u64{addr} / 128;
const u64 lock_addr = lock_val / 128;
if (test_addr == lock_addr)
{
// Our reservation is locked
range_lock->release(0);
break;
}
break;
}
}
// Fallback to normal range check
const u64 lock_addr = static_cast<u32>(lock_val);
const u32 lock_size = static_cast<u32>(lock_val << 3 >> 35);
if (lock_addr + lock_size <= addr || lock_addr >= addr + 128)
{
// We are outside locked range, so page flags are unaffected
if (!vm::check_addr(addr))
{
range_lock->release(0);
break;
}
}
else if (!(lock_val & vm::range_readable))
{
range_lock->release(0);
break;
}
old_lock = std::exchange(lock_val, *std::prev(std::end(vm::g_range_lock_set)));
}
if (!range_lock->load()) [[unlikely]]
{
return true;
}
const bool res = compute_rdata_hash32(*vm::get_super_ptr<decltype(rdata)>(addr)) == hash;
range_lock->release(0);
return !res;
}
usz spu_thread::register_cache_line_waiter(u32 addr)
{
const u64 value = u64{compute_rdata_hash32(rdata)} << 32 | raddr;
for (usz i = 0; i < std::size(g_spu_waiters_by_value); i++)
{
auto [old, ok] = g_spu_waiters_by_value[i].fetch_op([value](u64& x)
{
if (x == 0)
{
x = value + 1;
return true;
}
if ((x & -128) == value)
{
x++;
return true;
}
return false;
});
if (ok)
{
return i;
}
}
return umax;
}
void spu_thread::deregister_cache_line_waiter(usz index)
{
if (index == umax)
{
return;
}
g_spu_waiters_by_value[index].fetch_op([](u64& x)
{
x--;
if ((x & 127) == 0)
{
x = 0;
}
return false;
});
}
std::pair<u32, u32> spu_thread::read_dec() const
{
const u64 res = ch_dec_value - (is_dec_frozen ? 0 : (get_timebased_time() - ch_dec_start_timestamp));
@ -5739,6 +5899,24 @@ s64 spu_thread::get_ch_value(u32 ch)
#else
const bool reservation_busy_waiting = (seed + ((raddr == spurs_addr) ? 50u : 0u)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
#endif
usz cache_line_waiter_index = umax;
auto check_cache_line_waiter = [&]()
{
if (cache_line_waiter_index == umax)
{
return true;
}
if ((g_spu_waiters_by_value[cache_line_waiter_index] & -128) == 0)
{
deregister_cache_line_waiter(cache_line_waiter_index);
cache_line_waiter_index = umax;
return false;
}
return true;
};
for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
{
@ -5746,12 +5924,22 @@ s64 spu_thread::get_ch_value(u32 ch)
if (is_stopped(old))
{
if (cache_line_waiter_index != umax)
{
g_spu_waiters_by_value[cache_line_waiter_index].release(0);
}
return -1;
}
// Optimized check
if (raddr)
if (raddr && mask1 & SPU_EVENT_LR)
{
if (cache_line_waiter_index == umax)
{
cache_line_waiter_index = register_cache_line_waiter(raddr);
}
bool set_lr = false;
if (!vm::check_addr(raddr) || rtime != vm::reservation_acquire(raddr))
@ -5819,13 +6007,20 @@ s64 spu_thread::get_ch_value(u32 ch)
}
}
// Check other reservations in other threads
lv2_obj::notify_all();
if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR)
{
// Wait with extended timeout, in this situation we have notifications for nearly all writes making it possible
// Abort notifications are handled specially for performance reasons
if (auto wait_var = vm::reservation_notifier_begin_wait(raddr, rtime))
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{300'000});
if (check_cache_line_waiter())
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{300'000});
}
vm::reservation_notifier_end_wait(*wait_var);
}
@ -5834,9 +6029,14 @@ s64 spu_thread::get_ch_value(u32 ch)
const u32 _raddr = this->raddr;
#ifdef __linux__
if (auto wait_var = vm::reservation_notifier_begin_wait(_raddr, rtime))
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{50'000});
if (check_cache_line_waiter())
{
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{50'000});
}
vm::reservation_notifier_end_wait(*wait_var);
}
#else
@ -5891,13 +6091,20 @@ s64 spu_thread::get_ch_value(u32 ch)
return false;
}
// Check other reservations in other threads
lv2_obj::notify_all();
return true;
};
if (auto wait_var = vm::reservation_notifier_begin_wait(_raddr, rtime))
{
atomic_wait_engine::set_one_time_use_wait_callback(wait_cb);
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{80'000});
if (check_cache_line_waiter())
{
atomic_wait_engine::set_one_time_use_wait_callback(wait_cb);
utils::bless<atomic_t<u32>>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{80'000});
}
vm::reservation_notifier_end_wait(*wait_var);
}
@ -5918,6 +6125,8 @@ s64 spu_thread::get_ch_value(u32 ch)
thread_ctrl::wait_on(state, old, 100);
}
deregister_cache_line_waiter(cache_line_waiter_index);
wakeup_delay();
if (is_paused(state - cpu_flag::suspend))
@ -6617,6 +6826,8 @@ bool spu_thread::stop_and_signal(u32 code)
}
}
lv2_obj::notify_all();
while (auto old = +state)
{
if (old & cpu_flag::signal && state.test_and_reset(cpu_flag::signal))
@ -7185,6 +7396,8 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
}
}
lv2_obj::notify_all();
const u32 wait_on_val = static_cast<u32>(((pop ? bit_occupy : 0) | bit_wait) >> 32);
while (true)
@ -7470,3 +7683,4 @@ void fmt_class_string<spu_channel_4_t>::format(std::string& out, u64 arg)
DECLARE(spu_thread::g_raw_spu_ctr){};
DECLARE(spu_thread::g_raw_spu_id){};
DECLARE(spu_thread::g_spu_work_count){};
DECLARE(spu_thread::g_spu_waiters_by_value){};