diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 39a9d01bf0..58f2904e31 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -386,62 +386,6 @@ inline v128 operator ~(const v128& other) return v128::from64(~other._u64[0], ~other._u64[1]); } -static force_inline v128 sync_val_compare_and_swap(volatile v128* dest, v128 comp, v128 exch) -{ -#if !defined(_MSC_VER) - auto res = __sync_val_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch); - return (v128&)res; -#else - _InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp); - return comp; -#endif -} - -static force_inline bool sync_bool_compare_and_swap(volatile v128* dest, v128 comp, v128 exch) -{ -#if !defined(_MSC_VER) - return __sync_bool_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch); -#else - return _InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp) != 0; -#endif -} - -static force_inline v128 sync_lock_test_and_set(volatile v128* dest, v128 value) -{ - while (true) - { - const v128 old = *(v128*)dest; - if (sync_bool_compare_and_swap(dest, old, value)) return old; - } -} - -static force_inline v128 sync_fetch_and_or(volatile v128* dest, v128 value) -{ - while (true) - { - const v128 old = *(v128*)dest; - if (sync_bool_compare_and_swap(dest, old, value | old)) return old; - } -} - -static force_inline v128 sync_fetch_and_and(volatile v128* dest, v128 value) -{ - while (true) - { - const v128 old = *(v128*)dest; - if (sync_bool_compare_and_swap(dest, old, value & old)) return old; - } -} - -static force_inline v128 sync_fetch_and_xor(volatile v128* dest, v128 value) -{ - while (true) - { - const v128 old = *(v128*)dest; - if (sync_bool_compare_and_swap(dest, old, value ^ old)) return old; - } -} - template struct se_t; template struct se_t diff --git a/Utilities/GNU.h b/Utilities/GNU.h index fd8a3117b1..26c3465deb 100644 --- a/Utilities/GNU.h +++ b/Utilities/GNU.h @@ -85,43 +85,85 @@ typedef int clockid_t; int clock_gettime(clockid_t clk_id, struct timespec *tp); #endif /* __APPLE__ */ +#endif /* __GNUG__ */ -template inline std::enable_if_t::value, T> sync_val_compare_and_swap(volatile T* dest, T2 comp, T2 exch) +#if defined(_MSC_VER) +// Unsigned 128-bit number implementation +struct alignas(16) uint128_t +{ + uint64_t lo, hi; + + uint128_t& operator ++() + { + if (!++lo) ++hi; + return *this; + } + + uint128_t& operator --() + { + if (!lo--) hi--; + return *this; + } + + uint128_t operator ++(int) + { + uint128_t value = *this; + if (!++lo) ++hi; + return value; + } + + uint128_t operator --(int) + { + uint128_t value = *this; + if (!lo--) hi--; + return value; + } +}; + +using __uint128_t = uint128_t; +#endif + +// SFINAE Helper type +template using if_integral_t = std::enable_if_t::value || std::is_same, __uint128_t>::value, TT>; + +#if defined(__GNUG__) + +template inline if_integral_t sync_val_compare_and_swap(volatile T* dest, T2 comp, T2 exch) { return __sync_val_compare_and_swap(dest, comp, exch); } -template inline std::enable_if_t::value, bool> sync_bool_compare_and_swap(volatile T* dest, T2 comp, T2 exch) +template inline if_integral_t sync_bool_compare_and_swap(volatile T* dest, T2 comp, T2 exch) { return __sync_bool_compare_and_swap(dest, comp, exch); } -template inline std::enable_if_t::value, T> sync_lock_test_and_set(volatile T* dest, T2 value) +template inline if_integral_t sync_lock_test_and_set(volatile T* dest, T2 value) { return __sync_lock_test_and_set(dest, value); } -template inline std::enable_if_t::value, T> sync_fetch_and_add(volatile T* dest, T2 value) +template inline if_integral_t sync_fetch_and_add(volatile T* dest, T2 value) { return __sync_fetch_and_add(dest, value); } -template inline std::enable_if_t::value, T> sync_fetch_and_sub(volatile T* dest, T2 value) +template inline if_integral_t sync_fetch_and_sub(volatile T* dest, T2 value) { return __sync_fetch_and_sub(dest, value); } -template inline std::enable_if_t::value, T> sync_fetch_and_or(volatile T* dest, T2 value) +template inline if_integral_t sync_fetch_and_or(volatile T* dest, T2 value) { return __sync_fetch_and_or(dest, value); } -template inline std::enable_if_t::value, T> sync_fetch_and_and(volatile T* dest, T2 value) +template inline if_integral_t sync_fetch_and_and(volatile T* dest, T2 value) { return __sync_fetch_and_and(dest, value); } -template inline std::enable_if_t::value, T> sync_fetch_and_xor(volatile T* dest, T2 value) +template inline if_integral_t sync_fetch_and_xor(volatile T* dest, T2 value) { return __sync_fetch_and_xor(dest, value); } @@ -152,6 +194,12 @@ inline uint64_t sync_val_compare_and_swap(volatile uint64_t* dest, uint64_t comp return _InterlockedCompareExchange64((volatile long long*)dest, exch, comp); } +inline uint128_t sync_val_compare_and_swap(volatile uint128_t* dest, uint128_t comp, uint128_t exch) +{ + _InterlockedCompareExchange128((volatile long long*)dest, exch.hi, exch.lo, (long long*)&comp); + return comp; +} + inline bool sync_bool_compare_and_swap(volatile uint8_t* dest, uint8_t comp, uint8_t exch) { return (uint8_t)_InterlockedCompareExchange8((volatile char*)dest, exch, comp) == comp; @@ -172,6 +220,11 @@ inline bool sync_bool_compare_and_swap(volatile uint64_t* dest, uint64_t comp, u return (uint64_t)_InterlockedCompareExchange64((volatile long long*)dest, exch, comp) == comp; } +inline bool sync_bool_compare_and_swap(volatile uint128_t* dest, uint128_t comp, uint128_t exch) +{ + return _InterlockedCompareExchange128((volatile long long*)dest, exch.hi, exch.lo, (long long*)&comp) != 0; +} + // atomic exchange functions inline uint8_t sync_lock_test_and_set(volatile uint8_t* dest, uint8_t value) @@ -194,6 +247,18 @@ inline uint64_t sync_lock_test_and_set(volatile uint64_t* dest, uint64_t value) return _InterlockedExchange64((volatile long long*)dest, value); } +inline uint128_t sync_lock_test_and_set(volatile uint128_t* dest, uint128_t value) +{ + while (true) + { + uint128_t old; + old.lo = dest->lo; + old.hi = dest->hi; + + if (sync_bool_compare_and_swap(dest, old, value)) return old; + } +} + // atomic add functions inline uint8_t sync_fetch_and_add(volatile uint8_t* dest, uint8_t value) @@ -216,6 +281,22 @@ inline uint64_t sync_fetch_and_add(volatile uint64_t* dest, uint64_t value) return _InterlockedExchangeAdd64((volatile long long*)dest, value); } +inline uint128_t sync_fetch_and_add(volatile uint128_t* dest, uint128_t value) +{ + while (true) + { + uint128_t old; + old.lo = dest->lo; + old.hi = dest->hi; + + uint128_t _new; + _new.lo = old.lo + value.lo; + _new.hi = old.hi + value.hi + (_new.lo < value.lo); + + if (sync_bool_compare_and_swap(dest, old, _new)) return old; + } +} + // atomic sub functions inline uint8_t sync_fetch_and_sub(volatile uint8_t* dest, uint8_t value) @@ -238,6 +319,22 @@ inline uint64_t sync_fetch_and_sub(volatile uint64_t* dest, uint64_t value) return _InterlockedExchangeAdd64((volatile long long*)dest, -(long long)value); } +inline uint128_t sync_fetch_and_sub(volatile uint128_t* dest, uint128_t value) +{ + while (true) + { + uint128_t old; + old.lo = dest->lo; + old.hi = dest->hi; + + uint128_t _new; + _new.lo = old.lo - value.lo; + _new.hi = old.hi - value.hi - (old.lo < value.lo); + + if (sync_bool_compare_and_swap(dest, old, _new)) return old; + } +} + // atomic `bitwise or` functions inline uint8_t sync_fetch_and_or(volatile uint8_t* dest, uint8_t value) @@ -260,6 +357,22 @@ inline uint64_t sync_fetch_and_or(volatile uint64_t* dest, uint64_t value) return _InterlockedOr64((volatile long long*)dest, value); } +inline uint128_t sync_fetch_and_or(volatile uint128_t* dest, uint128_t value) +{ + while (true) + { + uint128_t old; + old.lo = dest->lo; + old.hi = dest->hi; + + uint128_t _new; + _new.lo = old.lo | value.lo; + _new.hi = old.hi | value.hi; + + if (sync_bool_compare_and_swap(dest, old, _new)) return old; + } +} + // atomic `bitwise and` functions inline uint8_t sync_fetch_and_and(volatile uint8_t* dest, uint8_t value) @@ -282,6 +395,22 @@ inline uint64_t sync_fetch_and_and(volatile uint64_t* dest, uint64_t value) return _InterlockedAnd64((volatile long long*)dest, value); } +inline uint128_t sync_fetch_and_and(volatile uint128_t* dest, uint128_t value) +{ + while (true) + { + uint128_t old; + old.lo = dest->lo; + old.hi = dest->hi; + + uint128_t _new; + _new.lo = old.lo & value.lo; + _new.hi = old.hi & value.hi; + + if (sync_bool_compare_and_swap(dest, old, _new)) return old; + } +} + // atomic `bitwise xor` functions inline uint8_t sync_fetch_and_xor(volatile uint8_t* dest, uint8_t value) @@ -304,6 +433,22 @@ inline uint64_t sync_fetch_and_xor(volatile uint64_t* dest, uint64_t value) return _InterlockedXor64((volatile long long*)dest, value); } +inline uint128_t sync_fetch_and_xor(volatile uint128_t* dest, uint128_t value) +{ + while (true) + { + uint128_t old; + old.lo = dest->lo; + old.hi = dest->hi; + + uint128_t _new; + _new.lo = old.lo ^ value.lo; + _new.hi = old.hi ^ value.hi; + + if (sync_bool_compare_and_swap(dest, old, _new)) return old; + } +} + #endif /* _MSC_VER */ inline uint32_t cntlz32(uint32_t arg) diff --git a/rpcs3/Emu/Memory/atomic.h b/rpcs3/Emu/Memory/atomic.h index 7e2634e908..a46f5aef55 100644 --- a/rpcs3/Emu/Memory/atomic.h +++ b/rpcs3/Emu/Memory/atomic.h @@ -27,7 +27,7 @@ template struct _to_atomic_subtype template struct _to_atomic_subtype { - using type = v128; + using type = u128; }; template using atomic_subtype_t = typename _to_atomic_subtype::type; @@ -127,7 +127,7 @@ private: data = value; } - force_inline static void write_relaxed(volatile v128& data, const v128& value) + force_inline static void write_relaxed(volatile u128& data, const u128& value) { sync_lock_test_and_set(&data, value); } @@ -137,9 +137,9 @@ private: return data; } - force_inline static v128 read_relaxed(const volatile v128& value) + force_inline static u128 read_relaxed(const volatile u128& value) { - return sync_val_compare_and_swap(const_cast(&value), {}, {}); + return sync_val_compare_and_swap(const_cast(&value), u128{0}, u128{0}); } public: @@ -240,8 +240,6 @@ public: } }; -template using if_integral_t = std::enable_if_t::value>; - template> inline T operator ++(_atomic_base& left) { return left.from_subtype(sync_fetch_and_add(&left.sub_data, 1) + 1); diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index cd97eb20ee..9210dafc02 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -63,6 +63,10 @@ using s64 = std::int64_t; using f32 = float; using f64 = double; +using u128 = __uint128_t; + +CHECK_SIZE_ALIGN(u128, 16, 16); + // bool type replacement for PS3/PSV class b8 {