Rewrite cpu_thread::suspend_all

Now it's a function of higher order.
Make only one thread do the hard work of thread pausing.
This commit is contained in:
Nekotekina 2020-10-09 20:33:12 +03:00
parent 6d83c9cc0e
commit 050c3e1d6b
10 changed files with 299 additions and 415 deletions

View file

@ -201,7 +201,7 @@ asmjit::JitRuntime& asmjit::get_global_runtime()
return g_rt; return g_rt;
} }
void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than) asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
{ {
Label fall = c.newLabel(); Label fall = c.newLabel();
Label begin = c.newLabel(); Label begin = c.newLabel();
@ -234,7 +234,10 @@ void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fall
c.jae(fallback); c.jae(fallback);
c.align(kAlignCode, 16); c.align(kAlignCode, 16);
c.bind(begin); c.bind(begin);
c.xbegin(fall); return fall;
// xbegin should be issued manually, allows to add more check before entering transaction
//c.xbegin(fall);
} }
void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code) void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code)

View file

@ -56,7 +56,7 @@ namespace asmjit
asmjit::JitRuntime& get_global_runtime(); asmjit::JitRuntime& get_global_runtime();
// Emit xbegin and adjacent loop, return label at xbegin // Emit xbegin and adjacent loop, return label at xbegin
void build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than); [[nodiscard]] asmjit::Label build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);
// Emit xabort // Emit xabort
void build_transaction_abort(X86Assembler& c, unsigned char code); void build_transaction_abort(X86Assembler& c, unsigned char code);

View file

@ -39,84 +39,6 @@ void shared_mutex::imp_unlock_shared(u32 old)
} }
} }
void shared_mutex::imp_lock_low(u32 val)
{
verify("shared_mutex underflow" HERE), val < c_err;
for (int i = 0; i < 10; i++)
{
busy_wait();
if (try_lock_low())
{
return;
}
}
// Acquire writer lock and downgrade
const u32 old = m_value.fetch_add(c_one);
if (old == 0)
{
lock_downgrade();
return;
}
verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
imp_wait();
lock_downgrade();
}
void shared_mutex::imp_unlock_low(u32 old)
{
verify("shared_mutex underflow" HERE), old - 1 < c_err;
// Check reader count, notify the writer if necessary
if ((old - 1) % c_vip == 0)
{
imp_signal();
}
}
void shared_mutex::imp_lock_vip(u32 val)
{
verify("shared_mutex underflow" HERE), val < c_err;
for (int i = 0; i < 10; i++)
{
busy_wait();
if (try_lock_vip())
{
return;
}
}
// Acquire writer lock and downgrade
const u32 old = m_value.fetch_add(c_one);
if (old == 0)
{
lock_downgrade_to_vip();
return;
}
verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
imp_wait();
lock_downgrade_to_vip();
}
void shared_mutex::imp_unlock_vip(u32 old)
{
verify("shared_mutex underflow" HERE), old - 1 < c_err;
// Check reader count, notify the writer if necessary
if ((old - 1) % c_one / c_vip == 0)
{
imp_signal();
}
}
void shared_mutex::imp_wait() void shared_mutex::imp_wait()
{ {
while (true) while (true)
@ -241,18 +163,3 @@ void shared_mutex::imp_lock_unlock()
imp_wait(); imp_wait();
unlock(); unlock();
} }
bool shared_mutex::downgrade_unique_vip_lock_to_low_or_unlock()
{
return m_value.atomic_op([](u32& value)
{
if (value % c_one / c_vip == 1)
{
value -= c_vip - 1;
return true;
}
value -= c_vip;
return false;
});
}

View file

@ -12,17 +12,12 @@ class shared_mutex final
c_one = 1u << 14, // Fixed-point 1.0 value (one writer, max_readers = c_one - 1) c_one = 1u << 14, // Fixed-point 1.0 value (one writer, max_readers = c_one - 1)
c_sig = 1u << 30, c_sig = 1u << 30,
c_err = 1u << 31, c_err = 1u << 31,
c_vip = 1u << 7,
}; };
atomic_t<u32> m_value{}; atomic_t<u32> m_value{};
void imp_lock_shared(u32 val); void imp_lock_shared(u32 val);
void imp_unlock_shared(u32 old); void imp_unlock_shared(u32 old);
void imp_lock_low(u32 val);
void imp_unlock_low(u32 old);
void imp_lock_vip(u32 val);
void imp_unlock_vip(u32 old);
void imp_wait(); void imp_wait();
void imp_signal(); void imp_signal();
void imp_lock(u32 val); void imp_lock(u32 val);
@ -88,64 +83,6 @@ public:
} }
} }
bool try_lock_low()
{
const u32 value = m_value.load();
// Conditional increment
return value < c_vip - 1 && m_value.compare_and_swap_test(value, value + 1);
}
void lock_low()
{
const u32 value = m_value.load();
if (value >= c_vip - 1 || !m_value.compare_and_swap_test(value, value + 1)) [[unlikely]]
{
imp_lock_low(value);
}
}
void unlock_low()
{
// Unconditional decrement (can result in broken state)
const u32 value = m_value.fetch_sub(1);
if (value >= c_one) [[unlikely]]
{
imp_unlock_low(value);
}
}
bool try_lock_vip()
{
const u32 value = m_value.load();
// Conditional increment
return (value < c_one - 1 || value & (c_one - c_vip)) && (value % c_vip) == 0 && m_value.compare_and_swap_test(value, value + c_vip);
}
void lock_vip()
{
const u32 value = m_value.load();
if ((value >= c_one - 1 && !(value & (c_one - c_vip))) || (value % c_vip) || !m_value.compare_and_swap_test(value, value + c_vip)) [[unlikely]]
{
imp_lock_vip(value);
}
}
void unlock_vip()
{
// Unconditional decrement (can result in broken state)
const u32 value = m_value.fetch_sub(c_vip);
if (value >= c_one) [[unlikely]]
{
imp_unlock_vip(value);
}
}
bool try_lock() bool try_lock()
{ {
return m_value.compare_and_swap_test(0, c_one); return m_value.compare_and_swap_test(0, c_one);
@ -214,12 +151,6 @@ public:
m_value -= c_one - 1; m_value -= c_one - 1;
} }
void lock_downgrade_to_vip()
{
// Convert to vip lock (can result in broken state)
m_value -= c_one - c_vip;
}
// Optimized wait for lockability without locking, relaxed // Optimized wait for lockability without locking, relaxed
void lock_unlock() void lock_unlock()
{ {
@ -240,12 +171,9 @@ public:
{ {
return m_value.load() < c_one - 1; return m_value.load() < c_one - 1;
} }
// Special purpose logic
bool downgrade_unique_vip_lock_to_low_or_unlock();
}; };
// Simplified shared (reader) lock implementation. Mutually incompatible with low_lock and vip_lock. // Simplified shared (reader) lock implementation.
class reader_lock final class reader_lock final
{ {
shared_mutex& m_mutex; shared_mutex& m_mutex;
@ -283,47 +211,3 @@ public:
m_upgraded ? m_mutex.unlock() : m_mutex.unlock_shared(); m_upgraded ? m_mutex.unlock() : m_mutex.unlock_shared();
} }
}; };
// Special shared (reader) lock, mutually exclusive with vip locks. Mutually incompatible with normal shared (reader) lock.
class low_lock final
{
shared_mutex& m_mutex;
public:
low_lock(const low_lock&) = delete;
low_lock& operator=(const low_lock&) = delete;
explicit low_lock(shared_mutex& mutex)
: m_mutex(mutex)
{
m_mutex.lock_low();
}
~low_lock()
{
m_mutex.unlock_low();
}
};
// Special shared (reader) lock, mutually exclusive with low locks. Mutually incompatible with normal shared (reader) lock.
class vip_lock final
{
shared_mutex& m_mutex;
public:
vip_lock(const vip_lock&) = delete;
vip_lock& operator=(const vip_lock&) = delete;
explicit vip_lock(shared_mutex& mutex)
: m_mutex(mutex)
{
m_mutex.lock_vip();
}
~vip_lock()
{
m_mutex.unlock_vip();
}
};

View file

@ -15,6 +15,7 @@
DECLARE(cpu_thread::g_threads_created){0}; DECLARE(cpu_thread::g_threads_created){0};
DECLARE(cpu_thread::g_threads_deleted){0}; DECLARE(cpu_thread::g_threads_deleted){0};
DECLARE(cpu_thread::g_suspend_counter){0};
LOG_CHANNEL(profiler); LOG_CHANNEL(profiler);
LOG_CHANNEL(sys_log, "SYS"); LOG_CHANNEL(sys_log, "SYS");
@ -245,6 +246,9 @@ struct cpu_counter
// For synchronizing suspend_all operation // For synchronizing suspend_all operation
alignas(64) shared_mutex cpu_suspend_lock; alignas(64) shared_mutex cpu_suspend_lock;
// Workload linked list
alignas(64) atomic_t<cpu_thread::suspend_work*> cpu_suspend_work{};
// Semaphore for global thread array (global counter) // Semaphore for global thread array (global counter)
alignas(64) atomic_t<u32> cpu_array_sema{0}; alignas(64) atomic_t<u32> cpu_array_sema{0};
@ -306,7 +310,7 @@ struct cpu_counter
}; };
template <typename F> template <typename F>
void for_all_cpu(F&& func) noexcept void for_all_cpu(F func) noexcept
{ {
auto ctr = g_fxo->get<cpu_counter>(); auto ctr = g_fxo->get<cpu_counter>();
@ -475,6 +479,7 @@ bool cpu_thread::check_state() noexcept
bool cpu_sleep_called = false; bool cpu_sleep_called = false;
bool escape, retval; bool escape, retval;
u64 susp_ctr = -1;
while (true) while (true)
{ {
@ -483,6 +488,16 @@ bool cpu_thread::check_state() noexcept
{ {
bool store = false; bool store = false;
// Easy way obtain suspend counter
if (flags & cpu_flag::pause && !(flags & cpu_flag::wait))
{
susp_ctr = g_suspend_counter;
}
else
{
susp_ctr = -1;
}
if (flags & cpu_flag::signal) if (flags & cpu_flag::signal)
{ {
flags -= cpu_flag::signal; flags -= cpu_flag::signal;
@ -559,8 +574,22 @@ bool cpu_thread::check_state() noexcept
continue; continue;
} }
// If only cpu_flag::pause was set, notification won't arrive // If only cpu_flag::pause was set, wait on suspend counter instead
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock(); if (state0 & cpu_flag::pause)
{
// Hard way
if (susp_ctr == umax)
{
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
continue;
}
// Wait for current suspend_all operation
while (busy_wait(), g_suspend_counter == susp_ctr)
{
g_suspend_counter.wait(susp_ctr);
}
}
} }
} }
} }
@ -641,69 +670,114 @@ std::string cpu_thread::dump_misc() const
return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load()); return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load());
} }
cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept void cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
: m_this(_this)
{ {
if (m_this) // Can't allow pre-set wait bit (it'd be a problem)
{ verify(HERE), !_this || !(_this->state & cpu_flag::wait);
m_this->state += cpu_flag::wait;
}
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_vip(); // Value must be reliable because cpu_flag::wait hasn't been observed only (but not if pause is set)
const u64 susp_ctr = g_suspend_counter;
for_all_cpu([](cpu_thread* cpu) // Try to push workload
auto& queue = g_fxo->get<cpu_counter>()->cpu_suspend_work;
do
{ {
// Should be atomic // Load current head
if (!(cpu->state & cpu_flag::pause)) next = queue.load();
if (!_this && next)
{ {
cpu->state += cpu_flag::pause; // If _this == nullptr, it only works if this is the first workload pushed
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
continue;
} }
}); }
while (!queue.compare_and_swap_test(next, this));
busy_wait(500); if (!next)
while (true)
{ {
bool ok = true; // First thread to push the work to the workload list pauses all threads and processes it
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
for_all_cpu([&](cpu_thread* cpu) for_all_cpu([&](cpu_thread* cpu)
{ {
if (!(cpu->state & cpu_flag::wait)) if (!(cpu->state & cpu_flag::pause) && cpu != _this)
{ {
ok = false; cpu->state += cpu_flag::pause;
} }
}); });
if (ok) [[likely]] busy_wait(500);
while (true)
{ {
break; bool ok = true;
for_all_cpu([&](cpu_thread* cpu)
{
if (!(cpu->state & cpu_flag::wait) && cpu != _this)
{
ok = false;
}
});
if (ok) [[likely]]
{
break;
}
} }
busy_wait(500); // Extract queue and reverse element order (FILO to FIFO) (TODO: maybe leave order as is?)
} auto* head = queue.exchange(nullptr);
}
if (auto* prev = head->next)
{
head->next = nullptr;
do
{
auto* pre2 = prev->next;
prev->next = head;
head = std::exchange(prev, pre2);
}
while (prev);
}
// Execute all stored workload
for (; head; head = head->next)
{
head->exec(head->func_ptr, head->res_buf);
}
// Finalization
g_suspend_counter++;
cpu_thread::suspend_all::~suspend_all()
{
// Make sure the latest thread does the cleanup and notifies others
if (g_fxo->get<cpu_counter>()->cpu_suspend_lock.downgrade_unique_vip_lock_to_low_or_unlock())
{
for_all_cpu([&](cpu_thread* cpu) for_all_cpu([&](cpu_thread* cpu)
{ {
cpu->state -= cpu_flag::pause; if (cpu != _this)
{
cpu->state -= cpu_flag::pause;
}
}); });
g_fxo->get<cpu_counter>()->cpu_suspend_lock.unlock_low();
} }
else else
{ {
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock(); // Seems safe to set pause on self because wait flag hasn't been observed yet
_this->state += cpu_flag::pause + cpu_flag::wait;
// Subscribe for notification broadcast
while (busy_wait(), g_suspend_counter == susp_ctr)
{
g_suspend_counter.wait(susp_ctr);
}
_this->check_state();
return;
} }
if (m_this) g_suspend_counter.notify_all();
{
m_this->check_state();
}
} }
void cpu_thread::stop_all() noexcept void cpu_thread::stop_all() noexcept
@ -716,7 +790,7 @@ void cpu_thread::stop_all() noexcept
} }
else else
{ {
::vip_lock lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock); std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
for_all_cpu([](cpu_thread* cpu) for_all_cpu([](cpu_thread* cpu)
{ {

View file

@ -88,7 +88,7 @@ private:
public: public:
// Thread stats for external observation // Thread stats for external observation
static atomic_t<u64> g_threads_created, g_threads_deleted; static atomic_t<u64> g_threads_created, g_threads_deleted, g_suspend_counter;
// Get thread name (as assigned to named_thread) // Get thread name (as assigned to named_thread)
std::string get_name() const; std::string get_name() const;
@ -123,18 +123,50 @@ public:
// Callback for cpu_flag::ret // Callback for cpu_flag::ret
virtual void cpu_return() {} virtual void cpu_return() {}
// Thread locker // For internal use
class suspend_all struct suspend_work
{ {
cpu_thread* m_this; void* func_ptr;
void* res_buf;
public: // Type-erased op executor
suspend_all(cpu_thread* _this) noexcept; void (*exec)(void* func, void* res);
suspend_all(const suspend_all&) = delete;
suspend_all& operator=(const suspend_all&) = delete; // Next object in the linked list
~suspend_all(); suspend_work* next;
// Internal method
void push(cpu_thread* _this) noexcept;
}; };
// Suspend all threads and execute op (may be executed by other thread than caller!)
template <typename F>
static auto suspend_all(cpu_thread* _this, F op)
{
if constexpr (std::is_void_v<std::invoke_result_t<F>>)
{
suspend_work work{&op, nullptr, [](void* func, void*)
{
(*static_cast<F*>(func))();
}};
work.push(_this);
return;
}
else
{
std::invoke_result_t<F> result;
suspend_work work{&op, &result, [](void* func, void* res_buf)
{
*static_cast<std::invoke_result_t<F>*>(res_buf) = (*static_cast<F*>(func))();
}};
work.push(_this);
return result;
}
}
// Stop all threads with cpu_flag::dbg_global_stop // Stop all threads with cpu_flag::dbg_global_stop
static void stop_all() noexcept; static void stop_all() noexcept;

View file

@ -1275,7 +1275,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
} }
// Begin transaction // Begin transaction
build_transaction_enter(c, fall, x86::r12, 4); Label tx0 = build_transaction_enter(c, fall, x86::r12, 4);
c.xbegin(tx0);
c.mov(x86::rax, x86::qword_ptr(x86::rbx)); c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(skip); c.jnz(skip);
@ -1336,7 +1337,6 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.bind(fall); c.bind(fall);
c.sar(x86::eax, 24); c.sar(x86::eax, 24);
c.js(fail); c.js(fail);
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::wait));
// Touch memory if transaction failed without RETRY flag on the first attempt // Touch memory if transaction failed without RETRY flag on the first attempt
c.cmp(x86::r12, 1); c.cmp(x86::r12, 1);
@ -1361,7 +1361,14 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail2); c.jne(fail2);
build_transaction_enter(c, fall2, x86::r12, 666); Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fail3);
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13);
c.jne(fail2);
c.xbegin(tx1);
if (s_tsx_avx) if (s_tsx_avx)
{ {
@ -1535,30 +1542,18 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
default: break; default: break;
} }
cpu_thread::suspend_all cpu_lock(&ppu); return cpu_thread::suspend_all(&ppu, [&]
// Obtain unique lock
while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
{ {
busy_wait(100); if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
// Give up if reservation has been updated
if ((res & -128) != rtime)
{ {
res -= 1; data.release(reg_value);
return false; res += 127;
return true;
} }
}
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128))) res -= 1;
{ return false;
data.release(reg_value); });
res += 63;
return true;
}
res -= (vm::rsrv_unique_lock + 1);
return false;
} }
while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock))) while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))

View file

@ -376,7 +376,8 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
} }
// Begin transaction // Begin transaction
build_transaction_enter(c, fall, x86::r12, 4); Label tx0 = build_transaction_enter(c, fall, x86::r12, 4);
c.xbegin(tx0);
c.mov(x86::rax, x86::qword_ptr(x86::rbx)); c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(skip); c.jnz(skip);
@ -450,7 +451,6 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
c.bind(fall); c.bind(fall);
c.sar(x86::eax, 24); c.sar(x86::eax, 24);
c.js(fail); c.js(fail);
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::wait));
// Touch memory if transaction failed without RETRY flag on the first attempt // Touch memory if transaction failed without RETRY flag on the first attempt
c.cmp(x86::r12, 1); c.cmp(x86::r12, 1);
@ -471,11 +471,14 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax); c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax);
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(fail3); c.jnz(fail3);
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fail3);
c.and_(x86::rax, -128); c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail2); c.jne(fail2);
build_transaction_enter(c, fall2, x86::r12, 666); Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
c.xbegin(tx1);
if (s_tsx_avx) if (s_tsx_avx)
{ {
@ -648,7 +651,8 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
} }
// Begin transaction // Begin transaction
build_transaction_enter(c, fall, x86::r12, 8); Label tx0 = build_transaction_enter(c, fall, x86::r12, 8);
c.xbegin(tx0);
c.test(x86::dword_ptr(x86::rbx), vm::rsrv_unique_lock); c.test(x86::dword_ptr(x86::rbx), vm::rsrv_unique_lock);
c.jnz(skip); c.jnz(skip);
@ -683,7 +687,6 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
//c.jmp(fall); //c.jmp(fall);
c.bind(fall); c.bind(fall);
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::wait));
// Touch memory if transaction failed without RETRY flag on the first attempt // Touch memory if transaction failed without RETRY flag on the first attempt
c.cmp(x86::r12, 1); c.cmp(x86::r12, 1);
@ -703,7 +706,12 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(fall2); c.jnz(fall2);
build_transaction_enter(c, fall2, x86::r12, 666); Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
// Check pause flag
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
c.jc(fall2);
c.xbegin(tx1);
if (s_tsx_avx) if (s_tsx_avx)
{ {
@ -1848,38 +1856,26 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
if (render) render->pause(); if (render) render->pause();
cpu_thread::suspend_all cpu_lock(this); const bool ok = cpu_thread::suspend_all(this, [&]()
// Obtain unique lock
while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
{ {
busy_wait(100); if ((res & -128) == rtime)
// Give up if reservation has been updated
if ((res & -128) != rtime)
{ {
res -= 1; auto& data = vm::_ref<spu_rdata_t>(addr);
if (render) render->unpause();
return false; if (cmp_rdata(rdata, data))
{
mov_rdata(data, to_write);
res += 127;
return true;
}
} }
}
if ((res & -128) == rtime) res -= 1;
{ return false;
auto& data = vm::_ref<spu_rdata_t>(addr); });
if (cmp_rdata(rdata, data))
{
mov_rdata(data, to_write);
res += 63;
if (render) render->unpause();
return true;
}
}
res -= (vm::rsrv_unique_lock | 1);
if (render) render->unpause(); if (render) render->unpause();
return false; return ok;
} }
case 1: return true; case 1: return true;
case 0: return false; case 0: return false;
@ -1973,15 +1969,11 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
if (result == 0) if (result == 0)
{ {
cpu_thread::suspend_all cpu_lock(cpu); cpu_thread::suspend_all(cpu, [&]
while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
{ {
busy_wait(100); mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
} vm::reservation_acquire(addr, 128) += 127;
});
mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
vm::reservation_acquire(addr, 128) += 63;
} }
if (render) render->unpause(); if (render) render->unpause();

View file

@ -497,33 +497,25 @@ namespace vm
void reservation_op_internal(u32 addr, std::function<bool()> func) void reservation_op_internal(u32 addr, std::function<bool()> func)
{ {
const auto _cpu = get_current_cpu_thread(); const bool ok = cpu_thread::suspend_all(get_current_cpu_thread(), [&]
// Acknowledge contender if necessary (TODO: check)
_cpu->state += cpu_flag::wait;
{ {
cpu_thread::suspend_all cpu_lock(_cpu);
// Wait to acquire unique lock
while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
{
busy_wait(100);
}
if (func()) if (func())
{ {
// Success, release all locks if necessary // Success, release all locks if necessary
vm::reservation_acquire(addr, 128) += 63; vm::reservation_acquire(addr, 128) += 127;
return true;
} }
else else
{ {
// Fake update (TODO) vm::reservation_acquire(addr, 128) -= 1;
vm::reservation_acquire(addr, 128) += 63; return false;
} }
} });
vm::reservation_notifier(addr, 128).notify_all(); if (ok)
{
vm::reservation_notifier(addr, 128).notify_all();
}
} }
void reservation_escape_internal() void reservation_escape_internal()

View file

@ -319,35 +319,36 @@ std::vector<u32> cheat_engine::search(const T value, const std::vector<u32>& to_
if (Emu.IsStopped()) if (Emu.IsStopped())
return {}; return {};
cpu_thread::suspend_all cpu_lock(nullptr); cpu_thread::suspend_all(nullptr, [&]
if (!to_filter.empty())
{ {
for (const auto& off : to_filter) if (!to_filter.empty())
{ {
if (vm::check_addr(off, sizeof(T))) for (const auto& off : to_filter)
{ {
if (*vm::get_super_ptr<T>(off) == value_swapped) if (vm::check_addr(off, sizeof(T)))
results.push_back(off);
}
}
}
else
{
// Looks through mapped memory
for (u32 page_start = 0x10000; page_start < 0xF0000000; page_start += 4096)
{
if (vm::check_addr(page_start))
{
// Assumes the values are aligned
for (u32 index = 0; index < 4096; index += sizeof(T))
{ {
if (*vm::get_super_ptr<T>(page_start + index) == value_swapped) if (*vm::get_super_ptr<T>(off) == value_swapped)
results.push_back(page_start + index); results.push_back(off);
} }
} }
} }
} else
{
// Looks through mapped memory
for (u32 page_start = 0x10000; page_start < 0xF0000000; page_start += 4096)
{
if (vm::check_addr(page_start))
{
// Assumes the values are aligned
for (u32 index = 0; index < 4096; index += sizeof(T))
{
if (*vm::get_super_ptr<T>(page_start + index) == value_swapped)
results.push_back(page_start + index);
}
}
}
}
});
return results; return results;
} }
@ -361,19 +362,17 @@ T cheat_engine::get_value(const u32 offset, bool& success)
return 0; return 0;
} }
cpu_thread::suspend_all cpu_lock(nullptr); return cpu_thread::suspend_all(nullptr, [&]() -> T
if (!vm::check_addr(offset, sizeof(T)))
{ {
success = false; if (!vm::check_addr(offset, sizeof(T)))
return 0; {
} success = false;
return 0;
}
success = true; success = true;
return *vm::get_super_ptr<T>(offset);
T ret_value = *vm::get_super_ptr<T>(offset); });
return ret_value;
} }
template <typename T> template <typename T>
@ -382,55 +381,61 @@ bool cheat_engine::set_value(const u32 offset, const T value)
if (Emu.IsStopped()) if (Emu.IsStopped())
return false; return false;
cpu_thread::suspend_all cpu_lock(nullptr);
if (!vm::check_addr(offset, sizeof(T))) if (!vm::check_addr(offset, sizeof(T)))
{ {
return false; return false;
} }
*vm::get_super_ptr<T>(offset) = value; return cpu_thread::suspend_all(nullptr, [&]
const bool exec_code_at_start = vm::check_addr(offset, 1, vm::page_executable);
const bool exec_code_at_end = [&]()
{ {
if constexpr (sizeof(T) == 1) if (!vm::check_addr(offset, sizeof(T)))
{ {
return exec_code_at_start; return false;
}
else
{
return vm::check_addr(offset + sizeof(T) - 1, 1, vm::page_executable);
}
}();
if (exec_code_at_end || exec_code_at_start)
{
extern void ppu_register_function_at(u32, u32, ppu_function_t);
u32 addr = offset, size = sizeof(T);
if (exec_code_at_end && exec_code_at_start)
{
size = align<u32>(addr + size, 4) - (addr & -4);
addr &= -4;
}
else if (exec_code_at_end)
{
size -= align<u32>(size - 4096 + (addr & 4095), 4);
addr = align<u32>(addr, 4096);
}
else if (exec_code_at_start)
{
size = align<u32>(4096 - (addr & 4095), 4);
addr &= -4;
} }
// Reinitialize executable code *vm::get_super_ptr<T>(offset) = value;
ppu_register_function_at(addr, size, nullptr);
}
return true; const bool exec_code_at_start = vm::check_addr(offset, 1, vm::page_executable);
const bool exec_code_at_end = [&]()
{
if constexpr (sizeof(T) == 1)
{
return exec_code_at_start;
}
else
{
return vm::check_addr(offset + sizeof(T) - 1, 1, vm::page_executable);
}
}();
if (exec_code_at_end || exec_code_at_start)
{
extern void ppu_register_function_at(u32, u32, ppu_function_t);
u32 addr = offset, size = sizeof(T);
if (exec_code_at_end && exec_code_at_start)
{
size = align<u32>(addr + size, 4) - (addr & -4);
addr &= -4;
}
else if (exec_code_at_end)
{
size -= align<u32>(size - 4096 + (addr & 4095), 4);
addr = align<u32>(addr, 4096);
}
else if (exec_code_at_start)
{
size = align<u32>(4096 - (addr & 4095), 4);
addr &= -4;
}
// Reinitialize executable code
ppu_register_function_at(addr, size, nullptr);
}
return true;
});
} }
bool cheat_engine::is_addr_safe(const u32 offset) bool cheat_engine::is_addr_safe(const u32 offset)