diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index 052e599748..f9484bd6d7 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -1350,7 +1350,7 @@ static u32 #ifdef _WIN32 __vectorcall #endif -alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m128i new_value) +alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m128i phantom) { verify(HERE), cond_id; @@ -1358,7 +1358,7 @@ alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m12 u32 ok = 0; - if (!size ? (!tid || cond->tid == tid) : cmp_mask(size, mask, new_value, cond->size | (cond->flag << 8), cond->mask, cond->oldv)) + if (!size ? (!tid || cond->tid == tid) : cmp_mask(size, mask, phantom, cond->size | (cond->flag << 8), cond->mask, cond->oldv)) { // Redirect if necessary const auto _old = cond; @@ -1599,7 +1599,7 @@ SAFE_BUFFERS void #ifdef _WIN32 __vectorcall #endif -atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask, __m128i new_value) +atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask) { const std::uintptr_t iptr = reinterpret_cast(data) & (~s_ref_mask >> 17); @@ -1616,7 +1616,7 @@ atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask, __m128i root_info::slot_search(iptr, size, 0, mask, [&](u32 cond_id) { - u32 res = alert_sema(cond_id, data, -1, size, mask, new_value); + u32 res = alert_sema(cond_id, data, -1, size, mask, _mm_setzero_si128()); if (res <= UINT16_MAX) { diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index b4f3598029..5035c8808b 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -234,7 +234,7 @@ private: #ifdef _WIN32 __vectorcall #endif - notify_all(const void* data, u32 size, __m128i mask128, __m128i val128); + notify_all(const void* data, u32 size, __m128i mask128); public: static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0)); @@ -1504,18 +1504,18 @@ public: } // Notify with mask and value, allowing to not wake up thread which doesn't wait on them - void notify_one(type mask_value, type new_value) noexcept + [[deprecated("Incomplete")]] void notify_one(type mask_value, type phantom_value) noexcept { if constexpr (sizeof(T) <= 8) { const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); - const __m128i _new = _mm_cvtsi64_si128(std::bit_cast>(new_value)); + const __m128i _new = _mm_cvtsi64_si128(std::bit_cast>(phantom_value)); atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new); } else if constexpr (sizeof(T) == 16) { const __m128i mask = std::bit_cast<__m128i>(mask_value); - const __m128i _new = std::bit_cast<__m128i>(new_value); + const __m128i _new = std::bit_cast<__m128i>(phantom_value); atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new); } } @@ -1524,11 +1524,11 @@ public: { if constexpr (sizeof(T) <= 8) { - atomic_wait_engine::notify_all(&m_data, -1, _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63)), _mm_setzero_si128()); + atomic_wait_engine::notify_all(&m_data, -1, _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63))); } else if constexpr (sizeof(T) == 16) { - atomic_wait_engine::notify_all(&m_data, -1, _mm_set1_epi64x(-1), _mm_setzero_si128()); + atomic_wait_engine::notify_all(&m_data, -1, _mm_set1_epi64x(-1)); } } @@ -1538,29 +1538,12 @@ public: if constexpr (sizeof(T) <= 8) { const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); - atomic_wait_engine::notify_all(&m_data, -1, mask, _mm_setzero_si128()); + atomic_wait_engine::notify_all(&m_data, -1, mask); } else if constexpr (sizeof(T) == 16) { const __m128i mask = std::bit_cast<__m128i>(mask_value); - atomic_wait_engine::notify_all(&m_data, -1, mask, _mm_setzero_si128()); - } - } - - // Notify all threads with mask and value, allowing to not wake up threads which don't wait on them - void notify_all(type mask_value, type new_value) noexcept - { - if constexpr (sizeof(T) <= 8) - { - const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); - const __m128i _new = _mm_cvtsi64_si128(std::bit_cast>(new_value)); - atomic_wait_engine::notify_all(&m_data, sizeof(T), mask, _new); - } - else if constexpr (sizeof(T) == 16) - { - const __m128i mask = std::bit_cast<__m128i>(mask_value); - const __m128i _new = std::bit_cast<__m128i>(new_value); - atomic_wait_engine::notify_all(&m_data, sizeof(T), mask, _new); + atomic_wait_engine::notify_all(&m_data, -1, mask); } } }; diff --git a/rpcs3/util/slow_mutex.hpp b/rpcs3/util/slow_mutex.hpp index 9c5d8ba28e..36e716863b 100644 --- a/rpcs3/util/slow_mutex.hpp +++ b/rpcs3/util/slow_mutex.hpp @@ -18,16 +18,16 @@ public: { const u8 prev = m_value.fetch_op([](u8& val) { - if (val == umax) [[unlikely]] + if ((val & 0x7f) == 0x7f) [[unlikely]] return; val++; }); - if (prev == umax) [[unlikely]] + if ((prev & 0x7f) == 0x7f) [[unlikely]] { // Keep trying until counter can be incremented - m_value.wait(0xff, 0x01); + m_value.wait(0x7f, 0x7f); } else if (prev == 0) { @@ -41,8 +41,9 @@ public: } } - // Wait for 7 bits to become 0, which could only mean one thing - m_value.wait(0, 0xfe); + // Wait for signal bit + m_value.wait(0, 0x80); + m_value &= ~0x80; } bool try_lock() noexcept @@ -63,13 +64,17 @@ public: fmt::raw_error("I tried to unlock unlocked mutex." HERE); } - // Normal notify with forced value (ignoring real waiter count) - m_value.notify_one(0xfe, 0); + // Set signal and notify + if (prev & 0x7f) + { + m_value |= 0x80; + m_value.notify_one(0x80); + } - if (prev == umax) [[unlikely]] + if ((prev & 0x7f) == 0x7f) [[unlikely]] { // Overflow notify: value can be incremented - m_value.notify_one(0x01, 0); + m_value.notify_one(0x7f); } }