TSX: Improve cpu_thread::suspend_all implementation

Implement low_lock and vip_lock (for shared_mutex).
Try to simplify suspend_all implementation with updated shared_mutex.
This commit is contained in:
Nekotekina 2019-07-23 00:37:35 +03:00
parent 49aefc0795
commit 090c71aa7c
4 changed files with 226 additions and 52 deletions

View file

@ -42,6 +42,84 @@ void shared_mutex::imp_unlock_shared(u32 old)
} }
} }
void shared_mutex::imp_lock_low(u32 val)
{
verify("shared_mutex underflow" HERE), val < c_err;
for (int i = 0; i < 10; i++)
{
busy_wait();
if (try_lock_low())
{
return;
}
}
// Acquire writer lock and downgrade
const u32 old = m_value.fetch_add(c_one);
if (old == 0)
{
lock_downgrade();
return;
}
verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
imp_wait();
lock_downgrade();
}
void shared_mutex::imp_unlock_low(u32 old)
{
verify("shared_mutex underflow" HERE), old - 1 < c_err;
// Check reader count, notify the writer if necessary
if ((old - 1) % c_vip == 0)
{
imp_signal();
}
}
void shared_mutex::imp_lock_vip(u32 val)
{
verify("shared_mutex underflow" HERE), val < c_err;
for (int i = 0; i < 10; i++)
{
busy_wait();
if (try_lock_vip())
{
return;
}
}
// Acquire writer lock and downgrade
const u32 old = m_value.fetch_add(c_one);
if (old == 0)
{
lock_downgrade_to_vip();
return;
}
verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
imp_wait();
lock_downgrade_to_vip();
}
void shared_mutex::imp_unlock_vip(u32 old)
{
verify("shared_mutex underflow" HERE), old - 1 < c_err;
// Check reader count, notify the writer if necessary
if ((old - 1) % c_one / c_vip == 0)
{
imp_signal();
}
}
void shared_mutex::imp_wait() void shared_mutex::imp_wait()
{ {
while (!balanced_wait_until(m_value, -1, [&](u32& value, auto...) while (!balanced_wait_until(m_value, -1, [&](u32& value, auto...)
@ -158,3 +236,18 @@ void shared_mutex::imp_lock_unlock()
imp_wait(); imp_wait();
unlock(); unlock();
} }
bool shared_mutex::downgrade_unique_vip_lock_to_low_or_unlock()
{
return m_value.atomic_op([](u32& value)
{
if (value % c_one / c_vip == 1)
{
value -= c_vip - 1;
return true;
}
value -= c_vip;
return false;
});
}

View file

@ -12,12 +12,17 @@ class shared_mutex final
c_one = 1u << 14, // Fixed-point 1.0 value (one writer, max_readers = c_one - 1) c_one = 1u << 14, // Fixed-point 1.0 value (one writer, max_readers = c_one - 1)
c_sig = 1u << 30, c_sig = 1u << 30,
c_err = 1u << 31, c_err = 1u << 31,
c_vip = 1u << 7,
}; };
atomic_t<u32> m_value{}; atomic_t<u32> m_value{};
void imp_lock_shared(u32 val); void imp_lock_shared(u32 val);
void imp_unlock_shared(u32 old); void imp_unlock_shared(u32 old);
void imp_lock_low(u32 val);
void imp_unlock_low(u32 old);
void imp_lock_vip(u32 val);
void imp_unlock_vip(u32 old);
void imp_wait(); void imp_wait();
void imp_signal(); void imp_signal();
void imp_lock(u32 val); void imp_lock(u32 val);
@ -83,6 +88,64 @@ public:
} }
} }
bool try_lock_low()
{
const u32 value = m_value.load();
// Conditional increment
return value < c_vip - 1 && m_value.compare_and_swap_test(value, value + 1);
}
void lock_low()
{
const u32 value = m_value.load();
if (UNLIKELY(value >= c_vip - 1 || !m_value.compare_and_swap_test(value, value + 1)))
{
imp_lock_low(value);
}
}
void unlock_low()
{
// Unconditional decrement (can result in broken state)
const u32 value = m_value.fetch_sub(1);
if (UNLIKELY(value >= c_one))
{
imp_unlock_low(value);
}
}
bool try_lock_vip()
{
const u32 value = m_value.load();
// Conditional increment
return value < c_one - 1 && (value % c_vip) == 0 && m_value.compare_and_swap_test(value, value + c_vip);
}
void lock_vip()
{
const u32 value = m_value.load();
if (UNLIKELY(value >= c_one - 1 || (value % c_vip) || !m_value.compare_and_swap_test(value, value + c_vip)))
{
imp_lock_vip(value);
}
}
void unlock_vip()
{
// Unconditional decrement (can result in broken state)
const u32 value = m_value.fetch_sub(c_vip);
if (UNLIKELY(value >= c_one))
{
imp_unlock_vip(value);
}
}
bool try_lock() bool try_lock()
{ {
return m_value.compare_and_swap_test(0, c_one); return m_value.compare_and_swap_test(0, c_one);
@ -151,6 +214,12 @@ public:
m_value -= c_one - 1; m_value -= c_one - 1;
} }
void lock_downgrade_to_vip()
{
// Convert to vip lock (can result in broken state)
m_value -= c_one - c_vip;
}
// Optimized wait for lockability without locking, relaxed // Optimized wait for lockability without locking, relaxed
void lock_unlock() void lock_unlock()
{ {
@ -171,9 +240,12 @@ public:
{ {
return m_value.load() < c_one - 1; return m_value.load() < c_one - 1;
} }
// Special purpose logic
bool downgrade_unique_vip_lock_to_low_or_unlock();
}; };
// Simplified shared (reader) lock implementation. // Simplified shared (reader) lock implementation. Mutually incompatible with low_lock and vip_lock.
class reader_lock final class reader_lock final
{ {
shared_mutex& m_mutex; shared_mutex& m_mutex;
@ -211,3 +283,47 @@ public:
m_upgraded ? m_mutex.unlock() : m_mutex.unlock_shared(); m_upgraded ? m_mutex.unlock() : m_mutex.unlock_shared();
} }
}; };
// Special shared (reader) lock, mutually exclusive with vip locks. Mutually incompatible with normal shared (reader) lock.
class low_lock final
{
shared_mutex& m_mutex;
public:
low_lock(const low_lock&) = delete;
low_lock& operator=(const low_lock&) = delete;
explicit low_lock(shared_mutex& mutex)
: m_mutex(mutex)
{
m_mutex.lock_low();
}
~low_lock()
{
m_mutex.unlock_low();
}
};
// Special shared (reader) lock, mutually exclusive with low locks. Mutually incompatible with normal shared (reader) lock.
class vip_lock final
{
shared_mutex& m_mutex;
public:
vip_lock(const vip_lock&) = delete;
vip_lock& operator=(const vip_lock&) = delete;
explicit vip_lock(shared_mutex& mutex)
: m_mutex(mutex)
{
m_mutex.lock_vip();
}
~vip_lock()
{
m_mutex.unlock_vip();
}
};

View file

@ -46,14 +46,8 @@ void fmt_class_string<bs_t<cpu_flag>>::format(std::string& out, u64 arg)
thread_local cpu_thread* g_tls_current_cpu_thread = nullptr; thread_local cpu_thread* g_tls_current_cpu_thread = nullptr;
// For coordination and notification // For synchronizing suspend_all operation
alignas(64) shared_cond g_cpu_array_lock; alignas(64) shared_mutex g_cpu_suspend_lock;
// For cpu_flag::pause bit setting/removing
alignas(64) shared_mutex g_cpu_pause_lock;
// For cpu_flag::pause
alignas(64) atomic_t<u64> g_cpu_pause_ctr{0};
// Semaphore for global thread array (global counter) // Semaphore for global thread array (global counter)
alignas(64) atomic_t<u32> g_cpu_array_sema{0}; alignas(64) atomic_t<u32> g_cpu_array_sema{0};
@ -135,7 +129,7 @@ void cpu_thread::operator()()
verify("g_cpu_array[...] -> this" HERE), g_cpu_array[array_slot].exchange(this) == nullptr; verify("g_cpu_array[...] -> this" HERE), g_cpu_array[array_slot].exchange(this) == nullptr;
state += cpu_flag::wait; state += cpu_flag::wait;
g_cpu_array_lock.wait_all(); g_cpu_suspend_lock.lock_unlock();
// Check thread status // Check thread status
while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop))) while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop)))
@ -171,7 +165,7 @@ void cpu_thread::operator()()
verify("g_cpu_array[...] -> null" HERE), g_cpu_array[array_slot].exchange(nullptr) == this; verify("g_cpu_array[...] -> null" HERE), g_cpu_array[array_slot].exchange(nullptr) == this;
g_cpu_array_bits[array_slot / 64] &= ~(1ull << (array_slot % 64)); g_cpu_array_bits[array_slot / 64] &= ~(1ull << (array_slot % 64));
g_cpu_array_sema--; g_cpu_array_sema--;
g_cpu_array_lock.wait_all(); g_cpu_suspend_lock.lock_unlock();
} }
void cpu_thread::on_abort() void cpu_thread::on_abort()
@ -294,7 +288,7 @@ bool cpu_thread::check_state() noexcept
else else
{ {
// If only cpu_flag::pause was set, notification won't arrive // If only cpu_flag::pause was set, notification won't arrive
g_cpu_array_lock.wait_all(); g_cpu_suspend_lock.lock_unlock();
} }
} }
@ -336,25 +330,14 @@ std::string cpu_thread::dump() const
} }
cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept
: m_lock(g_cpu_array_lock.try_shared_lock()) : m_this(_this)
, m_this(_this)
{ {
// TODO
if (!m_lock)
{
LOG_FATAL(GENERAL, "g_cpu_array_lock: too many concurrent accesses");
Emu.Pause();
return;
}
if (m_this) if (m_this)
{ {
m_this->state += cpu_flag::wait; m_this->state += cpu_flag::wait;
} }
g_cpu_pause_ctr++; g_cpu_suspend_lock.lock_vip();
reader_lock lock(g_cpu_pause_lock);
for_all_cpu([](cpu_thread* cpu) for_all_cpu([](cpu_thread* cpu)
{ {
@ -387,33 +370,18 @@ cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept
cpu_thread::suspend_all::~suspend_all() cpu_thread::suspend_all::~suspend_all()
{ {
// Make sure the latest thread does the cleanup and notifies others // Make sure the latest thread does the cleanup and notifies others
u64 pause_ctr = 0; if (g_cpu_suspend_lock.downgrade_unique_vip_lock_to_low_or_unlock())
while ((pause_ctr = g_cpu_pause_ctr), !g_cpu_array_lock.wait_all(m_lock))
{ {
if (pause_ctr)
{
std::lock_guard lock(g_cpu_pause_lock);
// Detect possible unfortunate reordering of flag clearing after suspend_all's reader lock
if (g_cpu_pause_ctr != pause_ctr)
{
continue;
}
for_all_cpu([&](cpu_thread* cpu) for_all_cpu([&](cpu_thread* cpu)
{
if (g_cpu_pause_ctr == pause_ctr)
{ {
cpu->state -= cpu_flag::pause; cpu->state -= cpu_flag::pause;
}
}); });
}
if (g_cpu_array_lock.notify_all(m_lock)) g_cpu_suspend_lock.unlock_low();
{
break;
} }
else
{
g_cpu_suspend_lock.lock_unlock();
} }
if (m_this) if (m_this)

View file

@ -2,7 +2,6 @@
#include "../Utilities/Thread.h" #include "../Utilities/Thread.h"
#include "../Utilities/bit_set.h" #include "../Utilities/bit_set.h"
#include "../Utilities/cond.h"
// Thread state flags // Thread state flags
enum class cpu_flag : u32 enum class cpu_flag : u32
@ -106,8 +105,6 @@ public:
// Thread locker // Thread locker
class suspend_all class suspend_all
{ {
decltype(std::declval<shared_cond&>().try_shared_lock()) m_lock;
cpu_thread* m_this; cpu_thread* m_this;
public: public: