From adffd53dbdc86f54803e79dcf06004e688e3b19d Mon Sep 17 00:00:00 2001 From: Andrea Toska Date: Mon, 16 Sep 2024 12:40:38 +0200 Subject: [PATCH 1/4] boss: Fix BOSS not honoring the proxy_server setting (#1344) --- src/Cafe/IOSU/legacy/iosu_boss.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Cafe/IOSU/legacy/iosu_boss.cpp b/src/Cafe/IOSU/legacy/iosu_boss.cpp index 760e5b66..7ab25f68 100644 --- a/src/Cafe/IOSU/legacy/iosu_boss.cpp +++ b/src/Cafe/IOSU/legacy/iosu_boss.cpp @@ -137,6 +137,10 @@ namespace iosu this->task_settings.taskType = settings->taskType; curl = std::shared_ptr(curl_easy_init(), curl_easy_cleanup); + if(GetConfig().proxy_server.GetValue() != "") + { + curl_easy_setopt(curl.get(), CURLOPT_PROXY, GetConfig().proxy_server.GetValue().c_str()); + } } }; From 8508c625407e80a5a7fcb9cf02c5355d018ff64b Mon Sep 17 00:00:00 2001 From: capitalistspz Date: Tue, 17 Sep 2024 01:00:26 +0100 Subject: [PATCH 2/4] Various smaller code improvements (#1343) --- CMakeLists.txt | 2 +- src/Cafe/OS/libs/nn_olv/nn_olv_OfflineDB.cpp | 4 +- src/Cafe/OS/libs/nsyshid/Infinity.cpp | 6 +- src/Cafe/OS/libs/ntag/ntag.cpp | 2 +- src/Cafe/OS/libs/snd_core/ax_out.cpp | 8 +- src/Common/MemPtr.h | 160 ++++++++++++------- src/Common/precompiled.h | 48 +++--- src/gui/CemuApp.cpp | 3 + src/util/CMakeLists.txt | 1 - src/util/ThreadPool/ThreadPool.cpp | 0 src/util/containers/robin_hood.h | 2 +- src/util/crypto/crc32.cpp | 55 ++----- src/util/crypto/crc32.h | 4 +- 13 files changed, 153 insertions(+), 142 deletions(-) delete mode 100644 src/util/ThreadPool/ThreadPool.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 54e2012a..5b5cff6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,7 +222,7 @@ if (ENABLE_CUBEB) option(BUILD_TOOLS "" OFF) option(BUNDLE_SPEEX "" OFF) set(USE_WINMM OFF CACHE BOOL "") - add_subdirectory("dependencies/cubeb" EXCLUDE_FROM_ALL) + add_subdirectory("dependencies/cubeb" EXCLUDE_FROM_ALL SYSTEM) set_property(TARGET cubeb PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") add_library(cubeb::cubeb ALIAS cubeb) endif() diff --git a/src/Cafe/OS/libs/nn_olv/nn_olv_OfflineDB.cpp b/src/Cafe/OS/libs/nn_olv/nn_olv_OfflineDB.cpp index 309394e6..c87cbd39 100644 --- a/src/Cafe/OS/libs/nn_olv/nn_olv_OfflineDB.cpp +++ b/src/Cafe/OS/libs/nn_olv/nn_olv_OfflineDB.cpp @@ -112,7 +112,7 @@ namespace nn nnResult _Async_OfflineDB_DownloadPostDataListParam_DownloadPostDataList(coreinit::OSEvent* event, DownloadedTopicData* downloadedTopicData, DownloadedPostData* downloadedPostData, uint32be* postCountOut, uint32 maxCount, DownloadPostDataListParam* param) { - scope_exit _se([&](){coreinit::OSSignalEvent(event);}); + stdx::scope_exit _se([&](){coreinit::OSSignalEvent(event);}); uint64 titleId = CafeSystem::GetForegroundTitleId(); @@ -184,7 +184,7 @@ namespace nn nnResult _Async_OfflineDB_DownloadPostDataListParam_DownloadExternalImageData(coreinit::OSEvent* event, DownloadedDataBase* _this, void* imageDataOut, uint32be* imageSizeOut, uint32 maxSize) { - scope_exit _se([&](){coreinit::OSSignalEvent(event);}); + stdx::scope_exit _se([&](){coreinit::OSSignalEvent(event);}); if (!_this->TestFlags(_this, DownloadedDataBase::FLAGS::HAS_EXTERNAL_IMAGE)) return OLV_RESULT_MISSING_DATA; diff --git a/src/Cafe/OS/libs/nsyshid/Infinity.cpp b/src/Cafe/OS/libs/nsyshid/Infinity.cpp index ab44ef4a..ac793109 100644 --- a/src/Cafe/OS/libs/nsyshid/Infinity.cpp +++ b/src/Cafe/OS/libs/nsyshid/Infinity.cpp @@ -1017,11 +1017,7 @@ namespace nsyshid std::array InfinityUSB::GenerateInfinityFigureKey(const std::vector& sha1Data) { std::array digest = {}; - SHA_CTX ctx; - SHA1_Init(&ctx); - SHA1_Update(&ctx, sha1Data.data(), sha1Data.size()); - SHA1_Final(digest.data(), &ctx); - OPENSSL_cleanse(&ctx, sizeof(ctx)); + SHA1(sha1Data.data(), sha1Data.size(), digest.data()); // Infinity AES keys are the first 16 bytes of the SHA1 Digest, every set of 4 bytes need to be // reversed due to endianness std::array key = {}; diff --git a/src/Cafe/OS/libs/ntag/ntag.cpp b/src/Cafe/OS/libs/ntag/ntag.cpp index 24617791..7c95a1a1 100644 --- a/src/Cafe/OS/libs/ntag/ntag.cpp +++ b/src/Cafe/OS/libs/ntag/ntag.cpp @@ -509,7 +509,7 @@ namespace ntag noftHeader->writeCount = _swapEndianU16(_swapEndianU16(noftHeader->writeCount) + 1); } - memcpy(decryptedBuffer + 0x20, noftHeader, sizeof(noftHeader)); + memcpy(decryptedBuffer + 0x20, noftHeader, sizeof(NTAGNoftHeader)); memcpy(decryptedBuffer + _swapEndianU16(rwHeader->offset), data, dataSize); // Encrypt diff --git a/src/Cafe/OS/libs/snd_core/ax_out.cpp b/src/Cafe/OS/libs/snd_core/ax_out.cpp index 68b05165..40b9c643 100644 --- a/src/Cafe/OS/libs/snd_core/ax_out.cpp +++ b/src/Cafe/OS/libs/snd_core/ax_out.cpp @@ -522,10 +522,10 @@ namespace snd_core // called periodically to check for AX updates void AXOut_update() { - constexpr auto kTimeout = std::chrono::duration_cast(std::chrono::milliseconds(((IAudioAPI::kBlockCount * 3) / 4) * (AX_FRAMES_PER_GROUP * 3))); - constexpr auto kWaitDuration = std::chrono::duration_cast(std::chrono::milliseconds(3)); - constexpr auto kWaitDurationFast = std::chrono::duration_cast(std::chrono::microseconds(2900)); - constexpr auto kWaitDurationMinimum = std::chrono::duration_cast(std::chrono::microseconds(1700)); + constexpr static auto kTimeout = std::chrono::duration_cast(std::chrono::milliseconds(((IAudioAPI::kBlockCount * 3) / 4) * (AX_FRAMES_PER_GROUP * 3))); + constexpr static auto kWaitDuration = std::chrono::duration_cast(std::chrono::milliseconds(3)); + constexpr static auto kWaitDurationFast = std::chrono::duration_cast(std::chrono::microseconds(2900)); + constexpr static auto kWaitDurationMinimum = std::chrono::duration_cast(std::chrono::microseconds(1700)); // if we haven't buffered any blocks, we will wait less time than usual bool additional_blocks_required = false; diff --git a/src/Common/MemPtr.h b/src/Common/MemPtr.h index 142da7e4..7825e4d5 100644 --- a/src/Common/MemPtr.h +++ b/src/Common/MemPtr.h @@ -4,7 +4,7 @@ using MPTR = uint32; // generic address in PowerPC memory space -#define MPTR_NULL (0) +#define MPTR_NULL (0) using VAddr = uint32; // virtual address using PAddr = uint32; // physical address @@ -14,137 +14,175 @@ extern uint8* PPCInterpreterGetStackPointer(); extern uint8* PPCInterpreter_PushAndReturnStackPointer(sint32 offset); extern void PPCInterpreterModifyStackPointer(sint32 offset); -class MEMPTRBase {}; +class MEMPTRBase +{ +}; -template +template class MEMPTR : MEMPTRBase { -public: - constexpr MEMPTR() - : m_value(0) { } + public: + constexpr MEMPTR() noexcept + : m_value(0) {} - explicit constexpr MEMPTR(uint32 offset) - : m_value(offset) { } + explicit constexpr MEMPTR(uint32 offset) noexcept + : m_value(offset) {} - explicit constexpr MEMPTR(const uint32be& offset) - : m_value(offset) { } + explicit constexpr MEMPTR(const uint32be& offset) noexcept + : m_value(offset) {} - constexpr MEMPTR(std::nullptr_t) - : m_value(0) { } + constexpr MEMPTR(std::nullptr_t) noexcept + : m_value(0) {} - MEMPTR(T* ptr) + MEMPTR(T* ptr) noexcept { if (ptr == nullptr) m_value = 0; else - { - cemu_assert_debug((uint8*)ptr >= memory_base && (uint8*)ptr <= memory_base + 0x100000000); - m_value = (uint32)((uintptr_t)ptr - (uintptr_t)memory_base); - } + { + cemu_assert_debug((uint8*)ptr >= memory_base && (uint8*)ptr <= memory_base + 0x100000000); + m_value = (uint32)((uintptr_t)ptr - (uintptr_t)memory_base); + } } - constexpr MEMPTR(const MEMPTR& memptr) - : m_value(memptr.m_value) { } + constexpr MEMPTR(const MEMPTR&) noexcept = default; - constexpr MEMPTR& operator=(const MEMPTR& memptr) - { - m_value = memptr.m_value; - return *this; - } + constexpr MEMPTR& operator=(const MEMPTR&) noexcept = default; - constexpr MEMPTR& operator=(const uint32& offset) + constexpr MEMPTR& operator=(const uint32& offset) noexcept { m_value = offset; return *this; } - constexpr MEMPTR& operator=(const std::nullptr_t rhs) + constexpr MEMPTR& operator=(std::nullptr_t) noexcept { m_value = 0; return *this; } - MEMPTR& operator=(T* ptr) + MEMPTR& operator=(T* ptr) noexcept { - if (ptr == nullptr) + if (ptr == nullptr) m_value = 0; else - { - cemu_assert_debug((uint8*)ptr >= memory_base && (uint8*)ptr <= memory_base + 0x100000000); - m_value = (uint32)((uintptr_t)ptr - (uintptr_t)memory_base); - } + { + cemu_assert_debug((uint8*)ptr >= memory_base && (uint8*)ptr <= memory_base + 0x100000000); + m_value = (uint32)((uintptr_t)ptr - (uintptr_t)memory_base); + } return *this; } - bool atomic_compare_exchange(T* comparePtr, T* newPtr) + bool atomic_compare_exchange(T* comparePtr, T* newPtr) noexcept { MEMPTR mp_compare = comparePtr; MEMPTR mp_new = newPtr; - std::atomic* thisValueAtomic = (std::atomic*)&m_value; + auto* thisValueAtomic = reinterpret_cast*>(&m_value); return thisValueAtomic->compare_exchange_strong(mp_compare.m_value, mp_new.m_value); } - explicit constexpr operator bool() const noexcept { return m_value != 0; } - - constexpr operator T*() const noexcept { return GetPtr(); } // allow implicit cast to wrapped pointer type + explicit constexpr operator bool() const noexcept + { + return m_value != 0; + } + // allow implicit cast to wrapped pointer type + constexpr operator T*() const noexcept + { + return GetPtr(); + } - template - explicit operator MEMPTR() const { return MEMPTR(this->m_value); } + template + explicit operator MEMPTR() const noexcept + { + return MEMPTR(this->m_value); + } - MEMPTR operator+(const MEMPTR& ptr) { return MEMPTR(this->GetMPTR() + ptr.GetMPTR()); } - MEMPTR operator-(const MEMPTR& ptr) { return MEMPTR(this->GetMPTR() - ptr.GetMPTR()); } + MEMPTR operator+(const MEMPTR& ptr) noexcept + { + return MEMPTR(this->GetMPTR() + ptr.GetMPTR()); + } + MEMPTR operator-(const MEMPTR& ptr) noexcept + { + return MEMPTR(this->GetMPTR() - ptr.GetMPTR()); + } - MEMPTR operator+(sint32 v) + MEMPTR operator+(sint32 v) noexcept { // pointer arithmetic return MEMPTR(this->GetMPTR() + v * 4); } - MEMPTR operator-(sint32 v) + MEMPTR operator-(sint32 v) noexcept { // pointer arithmetic return MEMPTR(this->GetMPTR() - v * 4); } - MEMPTR& operator+=(sint32 v) + MEMPTR& operator+=(sint32 v) noexcept { m_value += v * sizeof(T); return *this; } - template - typename std::enable_if::value, Q>::type& - operator*() const { return *GetPtr(); } + template + std::enable_if_t, Q>& operator*() const noexcept + { + return *GetPtr(); + } - T* operator->() const { return GetPtr(); } + constexpr T* operator->() const noexcept + { + return GetPtr(); + } - template - typename std::enable_if::value, Q>::type& - operator[](int index) { return GetPtr()[index]; } + template + std::enable_if_t, Q>& operator[](int index) noexcept + { + return GetPtr()[index]; + } - T* GetPtr() const { return (T*)(m_value == 0 ? nullptr : memory_base + (uint32)m_value); } + T* GetPtr() const noexcept + { + return (T*)(m_value == 0 ? nullptr : memory_base + (uint32)m_value); + } - template - C* GetPtr() const { return (C*)(GetPtr()); } + template + C* GetPtr() const noexcept + { + return static_cast(GetPtr()); + } - constexpr uint32 GetMPTR() const { return m_value.value(); } - constexpr const uint32be& GetBEValue() const { return m_value; } + [[nodiscard]] constexpr uint32 GetMPTR() const noexcept + { + return m_value.value(); + } + [[nodiscard]] constexpr const uint32be& GetBEValue() const noexcept + { + return m_value; + } - constexpr bool IsNull() const { return m_value == 0; } + [[nodiscard]] constexpr bool IsNull() const noexcept + { + return m_value == 0; + } -private: + private: uint32be m_value; }; static_assert(sizeof(MEMPTR) == sizeof(uint32be)); +static_assert(std::is_trivially_copyable_v>); #include "StackAllocator.h" #include "SysAllocator.h" -template +template struct fmt::formatter> : formatter { - template - auto format(const MEMPTR& v, FormatContext& ctx) const -> format_context::iterator { return fmt::format_to(ctx.out(), "{:#x}", v.GetMPTR()); } + template + auto format(const MEMPTR& v, FormatContext& ctx) const -> format_context::iterator + { + return fmt::format_to(ctx.out(), "{:#x}", v.GetMPTR()); + } }; diff --git a/src/Common/precompiled.h b/src/Common/precompiled.h index 61707519..d4df4343 100644 --- a/src/Common/precompiled.h +++ b/src/Common/precompiled.h @@ -394,16 +394,10 @@ void vectorRemoveByIndex(std::vector& vec, const size_t index) vec.erase(vec.begin() + index); } -template -int match_any_of(T1 value, T2 compareTo) +template +bool match_any_of(T1&& value, Types&&... others) { - return value == compareTo; -} - -template -bool match_any_of(T1 value, T2 compareTo, Types&&... others) -{ - return value == compareTo || match_any_of(value, others...); + return ((value == others) || ...); } // we cache the frequency in a static variable @@ -501,13 +495,6 @@ bool future_is_ready(std::future& f) #endif } -// replace with std::scope_exit once available -struct scope_exit -{ - std::function f_; - explicit scope_exit(std::function f) noexcept : f_(std::move(f)) {} - ~scope_exit() { if (f_) f_(); } -}; // helper function to cast raw pointers to std::atomic // this is technically not legal but works on most platforms as long as alignment restrictions are met and the implementation of atomic doesnt come with additional members @@ -515,6 +502,8 @@ struct scope_exit template std::atomic* _rawPtrToAtomic(T* ptr) { + static_assert(sizeof(T) == sizeof(std::atomic)); + cemu_assert_debug((reinterpret_cast(ptr) % alignof(std::atomic)) == 0); return reinterpret_cast*>(ptr); } @@ -578,13 +567,34 @@ struct fmt::formatter> : fmt::formatter } }; -// useful C++23 stuff that isn't yet widely supported - -// std::to_underlying +// useful future C++ stuff namespace stdx { + // std::to_underlying template {} >> constexpr std::underlying_type_t to_underlying(EnumT e) noexcept { return static_cast>(e); }; + + // std::scope_exit + template + class scope_exit + { + Fn m_func; + bool m_released = false; + public: + explicit scope_exit(Fn&& f) noexcept + : m_func(std::forward(f)) + {} + ~scope_exit() + { + if (!m_released) m_func(); + } + scope_exit(scope_exit&& other) noexcept + : m_func(std::move(other.m_func)), m_released(std::exchange(other.m_released, true)) + {} + scope_exit(const scope_exit&) = delete; + scope_exit& operator=(scope_exit) = delete; + void release() { m_released = true;} + }; } \ No newline at end of file diff --git a/src/gui/CemuApp.cpp b/src/gui/CemuApp.cpp index f91c1e3a..50ff3b89 100644 --- a/src/gui/CemuApp.cpp +++ b/src/gui/CemuApp.cpp @@ -15,6 +15,9 @@ #if BOOST_OS_LINUX && HAS_WAYLAND #include "gui/helpers/wxWayland.h" #endif +#if __WXGTK__ +#include +#endif #include #include diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 5af88176..5ac5ebfd 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -50,7 +50,6 @@ add_library(CemuUtil MemMapper/MemMapper.h SystemInfo/SystemInfo.cpp SystemInfo/SystemInfo.h - ThreadPool/ThreadPool.cpp ThreadPool/ThreadPool.h tinyxml2/tinyxml2.cpp tinyxml2/tinyxml2.h diff --git a/src/util/ThreadPool/ThreadPool.cpp b/src/util/ThreadPool/ThreadPool.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/src/util/containers/robin_hood.h b/src/util/containers/robin_hood.h index 577521b1..4f76519f 100644 --- a/src/util/containers/robin_hood.h +++ b/src/util/containers/robin_hood.h @@ -194,7 +194,7 @@ namespace robin_hood { // workaround missing "is_trivially_copyable" in g++ < 5.0 // See https://stackoverflow.com/a/31798726/48181 -#if defined(__GNUC__) && __GNUC__ < 5 +#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__) # define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) #else # define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value diff --git a/src/util/crypto/crc32.cpp b/src/util/crypto/crc32.cpp index 52eb8a88..a5b37a5e 100644 --- a/src/util/crypto/crc32.cpp +++ b/src/util/crypto/crc32.cpp @@ -1,29 +1,6 @@ #include "crc32.h" -#if defined(_MSC_VER) || defined(__MINGW32__) -#define __LITTLE_ENDIAN 1234 -#define __BIG_ENDIAN 4321 -#define __BYTE_ORDER __LITTLE_ENDIAN - -#include -#ifdef __MINGW32__ -#define PREFETCH(location) __builtin_prefetch(location) -#else -#define PREFETCH(location) _mm_prefetch(location, _MM_HINT_T0) -#endif -#else -// defines __BYTE_ORDER as __LITTLE_ENDIAN or __BIG_ENDIAN -#include - -#ifdef __GNUC__ -#define PREFETCH(location) __builtin_prefetch(location) -#else - // no prefetching -#define PREFETCH(location) ; -#endif -#endif - -unsigned int Crc32Lookup[8][256] = +constexpr uint32 Crc32Lookup[8][256] = { { 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3, @@ -301,20 +278,7 @@ unsigned int Crc32Lookup[8][256] = } }; -/// swap endianess -static inline uint32_t swap(uint32_t x) -{ -#if defined(__GNUC__) || defined(__clang__) - return __builtin_bswap32(x); -#else - return (x >> 24) | - ((x >> 8) & 0x0000FF00) | - ((x << 8) & 0x00FF0000) | - (x << 24); -#endif -} - -unsigned int crc32_calc_slice_by_8(unsigned int previousCrc32, const void* data, int length) +uint32 crc32_calc_slice_by_8(uint32 previousCrc32, const void* data, size_t length) { uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF const uint32_t* current = (const uint32_t*)data; @@ -323,7 +287,7 @@ unsigned int crc32_calc_slice_by_8(unsigned int previousCrc32, const void* data, while (length >= 8) { if constexpr (std::endian::native == std::endian::big){ - uint32_t one = *current++ ^ swap(crc); + uint32_t one = *current++ ^ _swapEndianU32(crc); uint32_t two = *current++; crc = Crc32Lookup[0][two & 0xFF] ^ Crc32Lookup[1][(two >> 8) & 0xFF] ^ @@ -348,13 +312,14 @@ unsigned int crc32_calc_slice_by_8(unsigned int previousCrc32, const void* data, Crc32Lookup[7][one & 0xFF]; } else { - cemu_assert(false); + static_assert(std::endian::native == std::endian::big || std::endian::native == std::endian::little, + "Platform byte-order is unsupported"); } length -= 8; } - const uint8_t* currentChar = (const uint8_t*)current; + const uint8* currentChar = (const uint8*)current; // remaining 1 to 7 bytes (standard algorithm) while (length-- != 0) crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; @@ -362,20 +327,20 @@ unsigned int crc32_calc_slice_by_8(unsigned int previousCrc32, const void* data, return ~crc; // same as crc ^ 0xFFFFFFFF } -unsigned int crc32_calc(unsigned int c, const void* data, int length) +uint32 crc32_calc(uint32 c, const void* data, size_t length) { if (length >= 16) { return crc32_calc_slice_by_8(c, data, length); } - unsigned char* p = (unsigned char*)data; + const uint8* p = (const uint8*)data; if (length == 0) return c; c ^= 0xFFFFFFFF; while (length) { - unsigned char temp = *p; - temp ^= (unsigned char)c; + uint8 temp = *p; + temp ^= (uint8)c; c = (c >> 8) ^ Crc32Lookup[0][temp]; // next length--; diff --git a/src/util/crypto/crc32.h b/src/util/crypto/crc32.h index b8002261..2ab37376 100644 --- a/src/util/crypto/crc32.h +++ b/src/util/crypto/crc32.h @@ -1,8 +1,8 @@ #pragma once -unsigned int crc32_calc(unsigned int c, const void* data, int length); +uint32 crc32_calc(uint32 c, const void* data, size_t length); -inline unsigned int crc32_calc(const void* data, int length) +inline uint32 crc32_calc(const void* data, size_t length) { return crc32_calc(0, data, length); } \ No newline at end of file From 6dc73f5d797082c25a68ad162377077547948d26 Mon Sep 17 00:00:00 2001 From: Alexandre Bouvier Date: Thu, 3 Oct 2024 06:48:25 +0000 Subject: [PATCH 3/4] Add support for fmt 11 (#1366) --- src/Cemu/Logging/CemuLogging.h | 4 ++++ src/config/CemuConfig.h | 10 +++++----- src/gui/helpers/wxHelpers.h | 2 +- src/input/emulated/EmulatedController.h | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Cemu/Logging/CemuLogging.h b/src/Cemu/Logging/CemuLogging.h index a671ce51..5b2e5fa4 100644 --- a/src/Cemu/Logging/CemuLogging.h +++ b/src/Cemu/Logging/CemuLogging.h @@ -91,7 +91,11 @@ bool cemuLog_log(LogType type, std::basic_string formatStr, TArgs&&... args) else { const auto format_view = fmt::basic_string_view(formatStr); +#if FMT_VERSION >= 110000 + const auto text = fmt::vformat(format_view, fmt::make_format_args>(args...)); +#else const auto text = fmt::vformat(format_view, fmt::make_format_args>(args...)); +#endif cemuLog_log(type, std::basic_string_view(text.data(), text.size())); } return true; diff --git a/src/config/CemuConfig.h b/src/config/CemuConfig.h index e2fbb74c..2f22cd76 100644 --- a/src/config/CemuConfig.h +++ b/src/config/CemuConfig.h @@ -194,7 +194,7 @@ ENABLE_ENUM_ITERATORS(CrashDump, CrashDump::Disabled, CrashDump::Enabled); template <> struct fmt::formatter : formatter { template - auto format(const PrecompiledShaderOption c, FormatContext &ctx) { + auto format(const PrecompiledShaderOption c, FormatContext &ctx) const { string_view name; switch (c) { @@ -209,7 +209,7 @@ struct fmt::formatter : formatter { template <> struct fmt::formatter : formatter { template - auto format(const AccurateShaderMulOption c, FormatContext &ctx) { + auto format(const AccurateShaderMulOption c, FormatContext &ctx) const { string_view name; switch (c) { @@ -223,7 +223,7 @@ struct fmt::formatter : formatter { template <> struct fmt::formatter : formatter { template - auto format(const CPUMode c, FormatContext &ctx) { + auto format(const CPUMode c, FormatContext &ctx) const { string_view name; switch (c) { @@ -240,7 +240,7 @@ struct fmt::formatter : formatter { template <> struct fmt::formatter : formatter { template - auto format(const CPUModeLegacy c, FormatContext &ctx) { + auto format(const CPUModeLegacy c, FormatContext &ctx) const { string_view name; switch (c) { @@ -257,7 +257,7 @@ struct fmt::formatter : formatter { template <> struct fmt::formatter : formatter { template - auto format(const CafeConsoleRegion v, FormatContext &ctx) { + auto format(const CafeConsoleRegion v, FormatContext &ctx) const { string_view name; switch (v) { diff --git a/src/gui/helpers/wxHelpers.h b/src/gui/helpers/wxHelpers.h index fa135cf4..9e00bf48 100644 --- a/src/gui/helpers/wxHelpers.h +++ b/src/gui/helpers/wxHelpers.h @@ -8,7 +8,7 @@ template <> struct fmt::formatter : formatter { template - auto format(const wxString& str, FormatContext& ctx) + auto format(const wxString& str, FormatContext& ctx) const { return formatter::format(str.c_str().AsChar(), ctx); } diff --git a/src/input/emulated/EmulatedController.h b/src/input/emulated/EmulatedController.h index 907be07e..c5adf81e 100644 --- a/src/input/emulated/EmulatedController.h +++ b/src/input/emulated/EmulatedController.h @@ -127,7 +127,7 @@ using EmulatedControllerPtr = std::shared_ptr; template <> struct fmt::formatter : formatter { template - auto format(EmulatedController::Type v, FormatContext& ctx) { + auto format(EmulatedController::Type v, FormatContext& ctx) const { switch (v) { case EmulatedController::Type::VPAD: return formatter::format("Wii U Gamepad", ctx); From 3acd0c4f2ca328abe80f2d35fb20136e738c84ae Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 14 Oct 2024 14:03:36 +0200 Subject: [PATCH 4/4] Vulkan: Protect against uniform var ringbuffer overflow (#1378) --- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 8 +- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 4 +- .../Renderer/Vulkan/VulkanRendererCore.cpp | 99 +++++++++++++------ 3 files changed, 75 insertions(+), 36 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 12d1d975..9ad2c5ca 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -1892,6 +1892,7 @@ void VulkanRenderer::ProcessFinishedCommandBuffers() if (fenceStatus == VK_SUCCESS) { ProcessDestructionQueue(); + m_uniformVarBufferReadIndex = m_cmdBufferUniformRingbufIndices[m_commandBufferSyncIndex]; m_commandBufferSyncIndex = (m_commandBufferSyncIndex + 1) % m_commandBuffers.size(); memoryManager->cleanupBuffers(m_countCommandBufferFinished); m_countCommandBufferFinished++; @@ -1985,6 +1986,7 @@ void VulkanRenderer::SubmitCommandBuffer(VkSemaphore signalSemaphore, VkSemaphor cemuLog_logDebug(LogType::Force, "Vulkan: Waiting for available command buffer..."); WaitForNextFinishedCommandBuffer(); } + m_cmdBufferUniformRingbufIndices[nextCmdBufferIndex] = m_cmdBufferUniformRingbufIndices[m_commandBufferIndex]; m_commandBufferIndex = nextCmdBufferIndex; @@ -3562,13 +3564,13 @@ void VulkanRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, switch (shaderType) { case LatteConst::ShaderType::Vertex: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].unformBufferOffset[bufferIndex] = offset; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].uniformBufferOffset[bufferIndex] = offset; break; case LatteConst::ShaderType::Geometry: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].unformBufferOffset[bufferIndex] = offset; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].uniformBufferOffset[bufferIndex] = offset; break; case LatteConst::ShaderType::Pixel: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = offset; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].uniformBufferOffset[bufferIndex] = offset; break; default: cemu_assert_debug(false); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index ce97b5e9..52c1c6ed 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -591,6 +591,7 @@ private: bool m_uniformVarBufferMemoryIsCoherent{false}; uint8* m_uniformVarBufferPtr = nullptr; uint32 m_uniformVarBufferWriteIndex = 0; + uint32 m_uniformVarBufferReadIndex = 0; // transform feedback ringbuffer VkBuffer m_xfbRingBuffer = VK_NULL_HANDLE; @@ -637,6 +638,7 @@ private: size_t m_commandBufferIndex = 0; // current buffer being filled size_t m_commandBufferSyncIndex = 0; // latest buffer that finished execution (updated on submit) size_t m_commandBufferIDOfPrevFrame = 0; + std::array m_cmdBufferUniformRingbufIndices {}; // index in the uniform ringbuffer std::array m_cmd_buffer_fences; std::array m_commandBuffers; std::array m_commandBufferSemaphores; @@ -659,7 +661,7 @@ private: uint32 uniformVarBufferOffset[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT]; struct { - uint32 unformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS]; + uint32 uniformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS]; }shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT]; }dynamicOffsetInfo{}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 6500f7d3..3a684072 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -375,24 +375,20 @@ float s_vkUniformData[512 * 4]; void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader) { - auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return s_vkUniformData + (index / 4); }; + auto GET_UNIFORM_DATA_PTR = [](size_t index) { return s_vkUniformData + (index / 4); }; sint32 shaderAluConst; - sint32 shaderUniformRegisterOffset; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: shaderAluConst = 0x400; - shaderUniformRegisterOffset = mmSQ_VTX_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Pixel: shaderAluConst = 0; - shaderUniformRegisterOffset = mmSQ_PS_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Geometry: shaderAluConst = 0; // geometry shader has no ALU const - shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START; break; default: UNREACHABLE; @@ -445,7 +441,7 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt } if (shader->uniform.loc_verticesPerInstance >= 0) { - *(int*)(s_vkUniformData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance; + *(int*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_verticesPerInstance) = m_streamoutState.verticesPerInstance; for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++) { if (shader->uniform.loc_streamoutBufferBase[b] >= 0) @@ -455,26 +451,63 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt } } // upload - if ((m_uniformVarBufferWriteIndex + shader->uniform.uniformRangeSize + 1024) > UNIFORMVAR_RINGBUFFER_SIZE) + const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; + const uint32 uniformSize = (shader->uniform.uniformRangeSize + bufferAlignmentM1) & ~bufferAlignmentM1; + + auto waitWhileCondition = [&](std::function condition) { + while (condition()) + { + if (m_commandBufferSyncIndex == m_commandBufferIndex) + { + if (m_cmdBufferUniformRingbufIndices[m_commandBufferIndex] != m_uniformVarBufferReadIndex) + { + draw_endRenderPass(); + SubmitCommandBuffer(); + } + else + { + // submitting work would not change readIndex, so there's no way for conditions based on it to change + cemuLog_log(LogType::Force, "draw call overflowed and corrupted uniform ringbuffer. expect visual corruption"); + cemu_assert_suspicious(); + break; + } + } + WaitForNextFinishedCommandBuffer(); + } + }; + + // wrap around if it doesnt fit consecutively + if (m_uniformVarBufferWriteIndex + uniformSize > UNIFORMVAR_RINGBUFFER_SIZE) { + waitWhileCondition([&]() { + return m_uniformVarBufferReadIndex > m_uniformVarBufferWriteIndex || m_uniformVarBufferReadIndex == 0; + }); m_uniformVarBufferWriteIndex = 0; } - uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; + + auto ringBufRemaining = [&]() { + ssize_t ringBufferUsedBytes = (ssize_t)m_uniformVarBufferWriteIndex - m_uniformVarBufferReadIndex; + if (ringBufferUsedBytes < 0) + ringBufferUsedBytes += UNIFORMVAR_RINGBUFFER_SIZE; + return UNIFORMVAR_RINGBUFFER_SIZE - 1 - ringBufferUsedBytes; + }; + waitWhileCondition([&]() { + return ringBufRemaining() < uniformSize; + }); + const uint32 uniformOffset = m_uniformVarBufferWriteIndex; memcpy(m_uniformVarBufferPtr + uniformOffset, s_vkUniformData, shader->uniform.uniformRangeSize); - m_uniformVarBufferWriteIndex += shader->uniform.uniformRangeSize; - m_uniformVarBufferWriteIndex = (m_uniformVarBufferWriteIndex + bufferAlignmentM1) & ~bufferAlignmentM1; + m_uniformVarBufferWriteIndex += uniformSize; // update dynamic offset dynamicOffsetInfo.uniformVarBufferOffset[shaderStageIndex] = uniformOffset; // flush if not coherent if (!m_uniformVarBufferMemoryIsCoherent) { - uint32 nonCoherentAtomSizeM1 = m_featureControl.limits.nonCoherentAtomSize - 1; VkMappedMemoryRange flushedRange{}; flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; flushedRange.memory = m_uniformVarBufferMemory; flushedRange.offset = uniformOffset; - flushedRange.size = (shader->uniform.uniformRangeSize + nonCoherentAtomSizeM1) & ~nonCoherentAtomSizeM1; + flushedRange.size = uniformSize; vkFlushMappedMemoryRanges(m_logicalDevice, 1, &flushedRange); } } @@ -494,7 +527,7 @@ void VulkanRenderer::draw_prepareDynamicOffsetsForDescriptorSet(uint32 shaderSta { for (auto& itr : pipeline_info->dynamicOffsetInfo.list_uniformBuffers[shaderStageIndex]) { - dynamicOffsets[numDynOffsets] = dynamicOffsetInfo.shaderUB[shaderStageIndex].unformBufferOffset[itr]; + dynamicOffsets[numDynOffsets] = dynamicOffsetInfo.shaderUB[shaderStageIndex].uniformBufferOffset[itr]; numDynOffsets++; } } @@ -1357,6 +1390,24 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 return; } + // prepare streamout + m_streamoutState.verticesPerInstance = count; + LatteStreamout_PrepareDrawcall(count, instanceCount); + + // update uniform vars + LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); + LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); + LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); + + if (vertexShader) + uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX, vertexShader); + if (pixelShader) + uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT, pixelShader); + if (geometryShader) + uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY, geometryShader); + // store where the read pointer should go after command buffer execution + m_cmdBufferUniformRingbufIndices[m_commandBufferIndex] = m_uniformVarBufferWriteIndex; + // process index data const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); @@ -1410,22 +1461,6 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); } - // prepare streamout - m_streamoutState.verticesPerInstance = count; - LatteStreamout_PrepareDrawcall(count, instanceCount); - - // update uniform vars - LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); - LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); - LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); - - if (vertexShader) - uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX, vertexShader); - if (pixelShader) - uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT, pixelShader); - if (geometryShader) - uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY, geometryShader); - PipelineInfo* pipeline_info; if (!isFirst) @@ -1613,13 +1648,13 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader switch (shaderType) { case LatteConst::ShaderType::Vertex: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].uniformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; break; case LatteConst::ShaderType::Geometry: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].uniformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; break; case LatteConst::ShaderType::Pixel: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].uniformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; break; default: UNREACHABLE;