atomic.hpp: use u128 as storage for masks/values

This commit is contained in:
Nekotekina 2020-12-20 09:27:40 +03:00
parent 43c87e99b0
commit 5f618814f6
4 changed files with 57 additions and 190 deletions

View file

@ -425,18 +425,6 @@ public:
} }
}; };
namespace atomic_wait
{
template <typename T>
inline __m128i default_mask<lf_queue<T>> = _mm_cvtsi64_si128(-1);
template <typename T>
constexpr __m128i get_value(lf_queue<T>&, std::nullptr_t value = nullptr)
{
return _mm_setzero_si128();
}
}
// Concurrent linked list, elements remain until destroyed. // Concurrent linked list, elements remain until destroyed.
template <typename T> template <typename T>
class lf_bunch final class lf_bunch final

View file

@ -42,11 +42,7 @@ static inline bool operator &(atomic_wait::op lhs, atomic_wait::op_flag rhs)
} }
// Compare data in memory with old value, and return true if they are equal // Compare data in memory with old value, and return true if they are equal
static NEVER_INLINE bool static NEVER_INLINE bool ptr_cmp(const void* data, u32 _size, u128 old128, u128 mask128, atomic_wait::info* ext = nullptr)
#ifdef _WIN32
__vectorcall
#endif
ptr_cmp(const void* data, u32 _size, __m128i old128, __m128i mask128, atomic_wait::info* ext = nullptr)
{ {
using atomic_wait::op; using atomic_wait::op;
using atomic_wait::op_flag; using atomic_wait::op_flag;
@ -59,8 +55,8 @@ ptr_cmp(const void* data, u32 _size, __m128i old128, __m128i mask128, atomic_wai
if (size <= 8) if (size <= 8)
{ {
u64 new_value = 0; u64 new_value = 0;
u64 old_value = _mm_cvtsi128_si64(old128); u64 old_value = static_cast<u64>(old128);
u64 mask = _mm_cvtsi128_si64(mask128) & (UINT64_MAX >> ((64 - size * 8) & 63)); u64 mask = static_cast<u64>(mask128) & (UINT64_MAX >> ((64 - size * 8) & 63));
// Don't load memory on empty mask // Don't load memory on empty mask
switch (mask ? size : 0) switch (mask ? size : 0)
@ -145,7 +141,7 @@ ptr_cmp(const void* data, u32 _size, __m128i old128, __m128i mask128, atomic_wai
case op::pop: case op::pop:
{ {
// Count is taken from least significant byte and ignores some flags // Count is taken from least significant byte and ignores some flags
const u64 count = _mm_cvtsi128_si64(old128) & 0xff; const u64 count = static_cast<u64>(old128) & 0xff;
u64 bitc = new_value; u64 bitc = new_value;
bitc = (bitc & 0xaaaaaaaaaaaaaaaa) / 2 + (bitc & 0x5555555555555555); bitc = (bitc & 0xaaaaaaaaaaaaaaaa) / 2 + (bitc & 0x5555555555555555);
@ -210,23 +206,18 @@ ptr_cmp(const void* data, u32 _size, __m128i old128, __m128i mask128, atomic_wai
} }
// Returns true if mask overlaps, or the argument is invalid // Returns true if mask overlaps, or the argument is invalid
static bool static bool cmp_mask(u32 size1, u128 mask1, u128 val1, u32 size2, u128 mask2, u128 val2)
#ifdef _WIN32
__vectorcall
#endif
cmp_mask(u32 size1, __m128i mask1, __m128i val1, u32 size2, __m128i mask2, __m128i val2)
{ {
// Compare only masks, new value is not available in this mode // Compare only masks, new value is not available in this mode
if (size1 == umax) if (size1 == umax)
{ {
// Simple mask overlap // Simple mask overlap
const auto v0 = _mm_and_si128(mask1, mask2); const u128 v0 = mask1 & mask2;
const auto v1 = _mm_packs_epi16(v0, v0); return !!(v0);
return !!_mm_cvtsi128_si64(v1);
} }
// Generate masked value inequality bits // Generate masked value inequality bits
const auto v0 = _mm_and_si128(_mm_and_si128(mask1, mask2), _mm_xor_si128(val1, val2)); const u128 v0 = (mask1 & mask2) & (val1 ^ val2);
using atomic_wait::op; using atomic_wait::op;
using atomic_wait::op_flag; using atomic_wait::op_flag;
@ -244,14 +235,14 @@ cmp_mask(u32 size1, __m128i mask1, __m128i val1, u32 size2, __m128i mask2, __m12
// Generate sized mask // Generate sized mask
const u64 mask = UINT64_MAX >> ((64 - size * 8) & 63); const u64 mask = UINT64_MAX >> ((64 - size * 8) & 63);
if (!(_mm_cvtsi128_si64(v0) & mask)) if (!(static_cast<u64>(v0) & mask))
{ {
return !!(flag & op_flag::inverse); return !!(flag & op_flag::inverse);
} }
} }
else if (size == 16) else if (size == 16)
{ {
if (!_mm_cvtsi128_si64(_mm_packs_epi16(v0, v0))) if (!v0)
{ {
return !!(flag & op_flag::inverse); return !!(flag & op_flag::inverse);
} }
@ -328,8 +319,8 @@ namespace
// Combined pointer (most significant 47 bits) and ref counter (17 least significant bits) // Combined pointer (most significant 47 bits) and ref counter (17 least significant bits)
atomic_t<u64> ptr_ref; atomic_t<u64> ptr_ref;
u64 tid; u64 tid;
__m128i mask; u128 mask;
__m128i oldv; u128 oldv;
u64 tsc0; u64 tsc0;
u16 link; u16 link;
@ -367,8 +358,8 @@ namespace
size = 0; size = 0;
flag = 0; flag = 0;
sync.release(0); sync.release(0);
mask = _mm_setzero_si128(); mask = 0;
oldv = _mm_setzero_si128(); oldv = 0;
#ifdef USE_STD #ifdef USE_STD
mtx.destroy(); mtx.destroy();
@ -557,11 +548,7 @@ namespace
// TLS storage for few allocaded "semaphores" to allow skipping initialization // TLS storage for few allocaded "semaphores" to allow skipping initialization
static thread_local tls_cond_handler s_tls_conds{}; static thread_local tls_cond_handler s_tls_conds{};
static u32 static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1)
#ifdef _WIN32
__vectorcall
#endif
cond_alloc(uptr iptr, __m128i mask, u32 tls_slot = -1)
{ {
// Try to get cond from tls slot instead // Try to get cond from tls slot instead
u16* ptls = tls_slot >= std::size(s_tls_conds.cond) ? nullptr : s_tls_conds.cond + tls_slot; u16* ptls = tls_slot >= std::size(s_tls_conds.cond) ? nullptr : s_tls_conds.cond + tls_slot;
@ -672,7 +659,7 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
{ {
// Fast finalization // Fast finalization
cond->sync.release(0); cond->sync.release(0);
cond->mask = _mm_setzero_si128(); cond->mask = 0;
*ptls = static_cast<u16>(cond_id); *ptls = static_cast<u16>(cond_id);
return; return;
} }
@ -709,11 +696,7 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
}); });
} }
static cond_handle* static cond_handle* cond_id_lock(u32 cond_id, u32 size, u128 mask, u64 thread_id = 0, uptr iptr = 0)
#ifdef _WIN32
__vectorcall
#endif
cond_id_lock(u32 cond_id, u32 size, __m128i mask, u64 thread_id = 0, uptr iptr = 0)
{ {
if (cond_id - 1 < u32{UINT16_MAX}) if (cond_id - 1 < u32{UINT16_MAX})
{ {
@ -740,7 +723,7 @@ cond_id_lock(u32 cond_id, u32 size, __m128i mask, u64 thread_id = 0, uptr iptr =
return false; return false;
} }
const __m128i mask12 = _mm_and_si128(mask, _mm_load_si128(&cond->mask)); const u128 mask12 = mask & cond->mask;
if (thread_id) if (thread_id)
{ {
@ -749,7 +732,7 @@ cond_id_lock(u32 cond_id, u32 size, __m128i mask, u64 thread_id = 0, uptr iptr =
return false; return false;
} }
} }
else if (size && _mm_cvtsi128_si64(_mm_packs_epi16(mask12, mask12)) == 0) else if (size && !mask12)
{ {
return false; return false;
} }
@ -805,7 +788,7 @@ namespace
static void slot_free(uptr ptr, atomic_t<u16>* slot, u32 tls_slot) noexcept; static void slot_free(uptr ptr, atomic_t<u16>* slot, u32 tls_slot) noexcept;
template <typename F> template <typename F>
static auto slot_search(uptr iptr, u32 size, u64 thread_id, __m128i mask, F func) noexcept; static auto slot_search(uptr iptr, u32 size, u64 thread_id, u128 mask, F func) noexcept;
}; };
static_assert(sizeof(root_info) == 64); static_assert(sizeof(root_info) == 64);
@ -991,7 +974,7 @@ void root_info::slot_free(uptr iptr, atomic_t<u16>* slot, u32 tls_slot) noexcept
} }
template <typename F> template <typename F>
FORCE_INLINE auto root_info::slot_search(uptr iptr, u32 size, u64 thread_id, __m128i mask, F func) noexcept FORCE_INLINE auto root_info::slot_search(uptr iptr, u32 size, u64 thread_id, u128 mask, F func) noexcept
{ {
u32 index = 0; u32 index = 0;
u32 total = 0; u32 total = 0;
@ -1041,11 +1024,7 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u32 size, u64 thread_id, __m
} }
} }
SAFE_BUFFERS void SAFE_BUFFERS void atomic_wait_engine::wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext)
#ifdef _WIN32
__vectorcall
#endif
atomic_wait_engine::wait(const void* data, u32 size, __m128i old_value, u64 timeout, __m128i mask, atomic_wait::info* ext)
{ {
const auto stamp0 = atomic_wait::get_unique_tsc(); const auto stamp0 = atomic_wait::get_unique_tsc();
@ -1300,11 +1279,7 @@ atomic_wait_engine::wait(const void* data, u32 size, __m128i old_value, u64 time
} }
template <bool NoAlert = false> template <bool NoAlert = false>
static u32 static u32 alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, u128 mask, u128 phantom)
#ifdef _WIN32
__vectorcall
#endif
alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m128i phantom)
{ {
ensure(cond_id); ensure(cond_id);
@ -1316,7 +1291,7 @@ alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m12
{ {
// Redirect if necessary // Redirect if necessary
const auto _old = cond; const auto _old = cond;
const auto _new = _old->link ? cond_id_lock(_old->link, 0, _mm_set1_epi64x(-1)) : _old; const auto _new = _old->link ? cond_id_lock(_old->link, 0, u128(-1)) : _old;
if (_new && _new->tsc0 == _old->tsc0) if (_new && _new->tsc0 == _old->tsc0)
{ {
@ -1488,10 +1463,10 @@ bool atomic_wait_engine::raw_notify(const void* data, u64 thread_id)
u64 progress = 0; u64 progress = 0;
root_info::slot_search(iptr, 0, thread_id, _mm_set1_epi64x(-1), [&](u32 cond_id) root_info::slot_search(iptr, 0, thread_id, u128(-1), [&](u32 cond_id)
{ {
// Forced notification // Forced notification
if (alert_sema(cond_id, data, thread_id, 0, _mm_setzero_si128(), _mm_setzero_si128())) if (alert_sema(cond_id, data, thread_id, 0, 0, 0))
{ {
if (s_tls_notify_cb) if (s_tls_notify_cb)
s_tls_notify_cb(data, ++progress); s_tls_notify_cb(data, ++progress);
@ -1514,11 +1489,7 @@ bool atomic_wait_engine::raw_notify(const void* data, u64 thread_id)
return progress != 0; return progress != 0;
} }
void void atomic_wait_engine::notify_one(const void* data, u32 size, u128 mask, u128 new_value)
#ifdef _WIN32
__vectorcall
#endif
atomic_wait_engine::notify_one(const void* data, u32 size, __m128i mask, __m128i new_value)
{ {
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17); const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
@ -1543,11 +1514,7 @@ atomic_wait_engine::notify_one(const void* data, u32 size, __m128i mask, __m128i
s_tls_notify_cb(data, -1); s_tls_notify_cb(data, -1);
} }
SAFE_BUFFERS void SAFE_BUFFERS void atomic_wait_engine::notify_all(const void* data, u32 size, u128 mask)
#ifdef _WIN32
__vectorcall
#endif
atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask)
{ {
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17); const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
@ -1564,7 +1531,7 @@ atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask)
root_info::slot_search(iptr, size, 0, mask, [&](u32 cond_id) root_info::slot_search(iptr, size, 0, mask, [&](u32 cond_id)
{ {
u32 res = alert_sema<true>(cond_id, data, -1, size, mask, _mm_setzero_si128()); u32 res = alert_sema<true>(cond_id, data, -1, size, mask, 0);
if (res && ~res <= UINT16_MAX) if (res && ~res <= UINT16_MAX)
{ {

View file

@ -124,32 +124,22 @@ namespace atomic_wait
} any_value; } any_value;
template <typename X, typename T = decltype(std::declval<X>().observe())> template <typename X, typename T = decltype(std::declval<X>().observe())>
inline __m128i default_mask = sizeof(T) <= 8 constexpr u128 default_mask = sizeof(T) <= 8 ? u128{UINT64_MAX >> ((64 - sizeof(T) * 8) & 63)} : u128(-1);
? _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63))
: _mm_set1_epi64x(-1);
template <typename X, typename T = decltype(std::declval<X>().observe())> template <typename X, typename T = decltype(std::declval<X>().observe())>
constexpr __m128i get_value(X&, T value = T{}, ...) constexpr u128 get_value(X&, T value = T{}, ...)
{ {
static_assert((sizeof(T) & (sizeof(T) - 1)) == 0); static_assert((sizeof(T) & (sizeof(T) - 1)) == 0);
static_assert(sizeof(T) <= 16); static_assert(sizeof(T) <= 16);
return std::bit_cast<get_uint_t<sizeof(T)>, T>(value);
if constexpr (sizeof(T) <= 8)
{
return _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>, T>(value));
}
else if constexpr (sizeof(T) == 16)
{
return std::bit_cast<__m128i>(value);
}
} }
struct info struct info
{ {
const void* data; const void* data;
u32 size; u32 size;
__m128i old; u128 old;
__m128i mask; u128 mask;
template <typename X, typename T = decltype(std::declval<X>().observe())> template <typename X, typename T = decltype(std::declval<X>().observe())>
constexpr void set_value(X& a, T value = T{}) constexpr void set_value(X& a, T value = T{})
@ -162,15 +152,7 @@ namespace atomic_wait
{ {
static_assert((sizeof(T) & (sizeof(T) - 1)) == 0); static_assert((sizeof(T) & (sizeof(T) - 1)) == 0);
static_assert(sizeof(T) <= 16); static_assert(sizeof(T) <= 16);
mask = std::bit_cast<get_uint_t<sizeof(T)>, T>(value);
if constexpr (sizeof(T) <= 8)
{
mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>, T>(value));
}
else if constexpr (sizeof(T) == 16)
{
mask = std::bit_cast<__m128i>(value);
}
} }
template <typename X, typename T = decltype(std::declval<X>().observe())> template <typename X, typename T = decltype(std::declval<X>().observe())>
@ -271,23 +253,9 @@ private:
template <uint Max, typename... T> template <uint Max, typename... T>
friend class atomic_wait::list; friend class atomic_wait::list;
static void static void wait(const void* data, u32 size, u128 old128, u64 timeout, u128 mask128, atomic_wait::info* extension = nullptr);
#ifdef _WIN32 static void notify_one(const void* data, u32 size, u128 mask128, u128 val128);
__vectorcall static void notify_all(const void* data, u32 size, u128 mask128);
#endif
wait(const void* data, u32 size, __m128i old128, u64 timeout, __m128i mask128, atomic_wait::info* extension = nullptr);
static void
#ifdef _WIN32
__vectorcall
#endif
notify_one(const void* data, u32 size, __m128i mask128, __m128i val128);
static void
#ifdef _WIN32
__vectorcall
#endif
notify_all(const void* data, u32 size, __m128i mask128);
public: public:
static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0)); static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0));
@ -1528,107 +1496,51 @@ public:
template <atomic_wait::op Flags = atomic_wait::op::eq> template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{ {
if constexpr (sizeof(T) <= 8) const u128 old = std::bit_cast<get_uint_t<sizeof(T)>>(old_value);
{ const u128 mask = atomic_wait::default_mask<atomic_t>;
const __m128i old = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(old_value));
const __m128i mask = _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63));
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask); atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask);
} }
else if constexpr (sizeof(T) == 16)
{
const __m128i old = std::bit_cast<__m128i>(old_value);
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), _mm_set1_epi64x(-1));
}
}
// Overload with mask (only selected bits are checked), timeout is discouraged // Overload with mask (only selected bits are checked), timeout is discouraged
template <atomic_wait::op Flags = atomic_wait::op::eq> template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{ {
if constexpr (sizeof(T) <= 8) const u128 old = std::bit_cast<get_uint_t<sizeof(T)>>(old_value);
{ const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
const __m128i old = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(old_value));
const __m128i mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(mask_value));
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask); atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask);
} }
else if constexpr (sizeof(T) == 16)
{
const __m128i old = std::bit_cast<__m128i>(old_value);
const __m128i mask = std::bit_cast<__m128i>(mask_value);
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask);
}
}
void notify_one() noexcept void notify_one() noexcept
{ {
if constexpr (sizeof(T) <= 8) atomic_wait_engine::notify_one(&m_data, -1, atomic_wait::default_mask<atomic_t>, 0);
{
atomic_wait_engine::notify_one(&m_data, -1, _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63)), _mm_setzero_si128());
}
else if constexpr (sizeof(T) == 16)
{
atomic_wait_engine::notify_one(&m_data, -1, _mm_set1_epi64x(-1), _mm_setzero_si128());
}
} }
// Notify with mask, allowing to not wake up thread which doesn't wait on this mask // Notify with mask, allowing to not wake up thread which doesn't wait on this mask
void notify_one(type mask_value) noexcept void notify_one(type mask_value) noexcept
{ {
if constexpr (sizeof(T) <= 8) const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
{ atomic_wait_engine::notify_one(&m_data, -1, mask, 0);
const __m128i mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(mask_value));
atomic_wait_engine::notify_one(&m_data, -1, mask, _mm_setzero_si128());
}
else if constexpr (sizeof(T) == 16)
{
const __m128i mask = std::bit_cast<__m128i>(mask_value);
atomic_wait_engine::notify_one(&m_data, -1, mask, _mm_setzero_si128());
}
} }
// Notify with mask and value, allowing to not wake up thread which doesn't wait on them // Notify with mask and value, allowing to not wake up thread which doesn't wait on them
[[deprecated("Incomplete")]] void notify_one(type mask_value, type phantom_value) noexcept [[deprecated("Incomplete")]] void notify_one(type mask_value, type phantom_value) noexcept
{ {
if constexpr (sizeof(T) <= 8) const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
{ const u128 _new = std::bit_cast<get_uint_t<sizeof(T)>>(phantom_value);
const __m128i mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(mask_value));
const __m128i _new = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(phantom_value));
atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new); atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new);
} }
else if constexpr (sizeof(T) == 16)
{
const __m128i mask = std::bit_cast<__m128i>(mask_value);
const __m128i _new = std::bit_cast<__m128i>(phantom_value);
atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new);
}
}
void notify_all() noexcept void notify_all() noexcept
{ {
if constexpr (sizeof(T) <= 8) atomic_wait_engine::notify_all(&m_data, -1, atomic_wait::default_mask<atomic_t>);
{
atomic_wait_engine::notify_all(&m_data, -1, _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63)));
}
else if constexpr (sizeof(T) == 16)
{
atomic_wait_engine::notify_all(&m_data, -1, _mm_set1_epi64x(-1));
}
} }
// Notify all threads with mask, allowing to not wake up threads which don't wait on them // Notify all threads with mask, allowing to not wake up threads which don't wait on them
void notify_all(type mask_value) noexcept void notify_all(type mask_value) noexcept
{ {
if constexpr (sizeof(T) <= 8) const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
{
const __m128i mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(mask_value));
atomic_wait_engine::notify_all(&m_data, -1, mask); atomic_wait_engine::notify_all(&m_data, -1, mask);
} }
else if constexpr (sizeof(T) == 16)
{
const __m128i mask = std::bit_cast<__m128i>(mask_value);
atomic_wait_engine::notify_all(&m_data, -1, mask);
}
}
}; };
template <usz Align> template <usz Align>
@ -1724,5 +1636,5 @@ public:
namespace atomic_wait namespace atomic_wait
{ {
template <usz Align> template <usz Align>
inline __m128i default_mask<atomic_t<bool, Align>> = _mm_cvtsi32_si128(1); constexpr u128 default_mask<atomic_t<bool, Align>> = 1;
} }

View file

@ -1117,12 +1117,12 @@ namespace stx
namespace atomic_wait namespace atomic_wait
{ {
template <typename T> template <typename T>
inline __m128i default_mask<stx::atomic_ptr<T>> = _mm_cvtsi64_si128(stx::c_ptr_mask); constexpr u128 default_mask<stx::atomic_ptr<T>> = stx::c_ptr_mask;
template <typename T> template <typename T>
constexpr __m128i get_value(stx::atomic_ptr<T>&, const volatile void* value = nullptr) constexpr u128 get_value(stx::atomic_ptr<T>&, const volatile void* value = nullptr)
{ {
return _mm_cvtsi64_si128(reinterpret_cast<uptr>(value) << stx::c_ref_size); return reinterpret_cast<uptr>(value) << stx::c_ref_size;
} }
} }